* Macros for producing Spatial Mismatch analysis file of displaced workers;
* Produced by Mark Kutzbach;

/* run step 0 cpr ------------------------------------------------------------------------ */
%macro lostjob_run0cpr;

    * pre-process extract CPR residences and separate into by-metro files;
    data cpr_ind (drop=ss ccc geocodefull)
         cpr_chi (drop=ss ccc geocodefull)
         cpr_det (drop=ss ccc geocodefull)
         cpr_col (drop=ss ccc geocodefull)
         cpr_cle (drop=ss ccc geocodefull)
         cpr_min (drop=ss ccc geocodefull)
         cpr_buf (drop=ss ccc geocodefull)
         cpr_pit (drop=ss ccc geocodefull)
         cpr_mil (drop=ss ccc geocodefull)
        ;
        length geocode $11 trct4 $9;
        set CPR.cpr_us_licf_1999 (keep=pik address_year geocodefull huid geo_src ss ccc)
            CPR.cpr_us_licf_2000 (keep=pik address_year geocodefull huid geo_src ss ccc)
            CPR.cpr_us_licf_2001 (keep=pik address_year geocodefull huid geo_src ss ccc)
            CPR.cpr_us_licf_2002 (keep=pik address_year geocodefull huid geo_src ss ccc)
            CPR.cpr_us_licf_2003 (keep=pik address_year geocodefull huid geo_src ss ccc)
            CPR.cpr_us_licf_2004 (keep=pik address_year geocodefull huid geo_src ss ccc)
            ;
        %let metord = &metord_start.;
        %do %until("%scan(&metrolist.,&metord.)"="");
        %let met=%scan(&metrolist.,&metord.);
        %let st=%scan(&stlist.,&metord.);
        %let stcode=%scan(&stcodelist.,&metord.);
        %let cty=%scan(%quote(&countylist.),&metord.,V);        
        %let cpr_where=(input(ss,2.)=&stcode.) and (ccc in (&cty.));
        if (&cpr_where.) then do;
            geocode=substr(geocodefull,1,11);
            trct4=substr(geocodefull,1,9);
            output cpr_&met.;
        end;
        %let metord=%eval(&metord+1);
        %end;
    run;
    
    * provide tract lookup file for tracts with incomplete precision;
    * some tracts in 2004 only have 4 complete digits (SSCCCTTTTTT);
    data trct4list;
        length trct4 $9;
        set GEO.intpts_trct (keep=trct);
        trct4=substr(trct,1,9);
    run;
    data trct4list;
        set trct4list;
        by trct4;
        if (first.trct4) then output;
    run;
        
    * cycle through each city, choosing the appropriate state, state codes, and counties;
    %let metord = &metord_start.;
    %do %until("%scan(&metrolist.,&metord.)"="");
        %let met=%scan(&metrolist.,&metord.);
        %let st=%scan(&stlist.,&metord.);
        %let stcode=%scan(&stcodelist.,&metord.);
        %let cty=%scan(%quote(&countylist.),&metord.,V);
        %put Process CPR for &met. &st. &cty.;                                           

        * sort to supplement tracts that are outdated;
        proc sort data=cpr_&met.;
            by geocode;
        run;
        
        * merge with updated tracts;
        data cpr_&met.;
            merge cpr_&met. (in=in_cpr)
                  GEO.intpts_trct (in=in_trct keep=trct rename=(trct=geocode));
            by geocode;
            if (in_cpr);
            * indicator for match success;
            trct_match=0;
            if (in_trct) then trct_match=1;
        run;
        data cpr_&met.;
            merge cpr_&met. (in=in_cpr)
                  trct4list (in=in_trct4);
            by trct4;
            if (in_cpr);
            * replace geocode if alternative;
            if ((trct_match=0) and (in_trct4)) then geocode=trct;
            if ((trct_match=0) and (in_trct4 ne 1)) then trct_match=9;
            drop trct trct4;
        run;

        * sort for merge, will use twice later;
        proc sort data=cpr_&met.;
            by pik address_year descending geo_src;
        run;
            
        * in the case of duplicates, keep only one (happens in 2001, CPR not unique that year);
        data OUTDATA.cpr_&met.;
            set cpr_&met.;
            by pik address_year;
            if (first.address_year);
        run;

        * summary;
        proc freq data=OUTDATA.cpr_&met.;
            title "summary of year and 4 digit tract substitution for &met.";
            table address_year*trct_match;
        run;
        proc contents data=OUTDATA.cpr_&met.;
        run;

        %let metord=%eval(&metord+1);
    %end;                           

%mend lostjob_run0cpr;


/* run step 0 census ------------------------------------------------------------------ */
%macro lostjob_run0cen;

    * pre-process census short form before using with metros;
    proc sort data=CENSHORT.pik_per 
            (keep=pik qsex qage qspanx qracex puid 
             where=((pik ne "") and (qage ge "015") and (qage le "080")))
              out=OUTDATA.census_short_sortpik;
         by pik;
    run;
    proc print data=OUTDATA.census_short_sortpik;
    run;
    * sort by pik for use in identifying households by puid;
    proc sort data=CENSHORT.pik_per
              (keep=pik qage qrel puid)
              out=OUTDATA.census_short_sortpuid;
        by puid pik;
    run;
    * check duplicates (removed later in step 1);
    proc sort data=OUTDATA.census_short_sortpik out=pik_per_nodup nodupkey;
        by pik;
    run;
                
%mend lostjob_run0cen;


/* run step 0 tract --------------------------------------------------------------------------- */
%macro lostjob_run0tract;

    * pre-process tract characteristics before using with metros;
    %let metord = &metord_start.;
    * cycle through each city, choosing the appropriate state, state codes, and counties;
    %do %until("%scan(&metrolist.,&metord.)"="");
        %let met=%scan(&metrolist.,&metord.);
        %let st=%scan(&stlist.,&metord.);
        %let stcode=%scan(&stcodelist.,&metord.);
        %let cty=%scan(%quote(&countylist.),&metord.,V);
        %put Process aggregate tract info for &met. &st. &cty.;
        
        * see census2000.sas;
        %tract_census;          

        %let metord=%eval(&metord+1);
    %end;                           

%mend lostjob_run0tract;
                

/* run step 0 access --------------------------------------------------------------------------- */
%macro lostjob_run0access;

    * pre-process job accessibility components before using with metros;
    %let metord = &metord_start.;
    * cycle through each city, choosing the appropriate state, state codes, and counties;
    %do %until("%scan(&metrolist.,&metord.)"="");
        %let met=%scan(&metrolist.,&metord.);
        %let st=%scan(&stlist.,&metord.);
        %let stcode=%scan(&stcodelist.,&metord.);
        %let cty=%scan(%quote(&countylist.),&metord.,V);
        %let sep_year_start=%scan(&sep_year_start_list.,&metord.);
        %let sep_year_end=%scan(&sep_year_end_list.,&metord.);
        %put metro access for &met. &st. &cty.;

        * by year;
        %let sepyear=&sep_year_start.;
        %do %while("&sepyear." le "&sep_year_end.");        
            %put sepyear is &sepyear.;
        
            * see measure_access.sas;
            %tract_access(&sepyear.);
        
            * append years (base is working file to avoid appending to existing data);
            proc append base=attrib_otm_job_&met. data=OUTDATA.attrib_otm_job_&met._&sepyear.;
            run;
            proc append base=attrib_otm_lab_&met. data=OUTDATA.attrib_otm_lab_&met._&sepyear.;
            run;
            proc append base=compete_&met. data=OUTDATA.compete_&met._&sepyear. 
                                                (keep=d_stfid year indxx_auto_o_lab: rings_dist_o_lab:);
            run;
            proc append base=access_&met. data=OUTDATA.access_&met._&sepyear.;
            run;

            %let sepyear=%eval(&sepyear+1);
        %end;
            
        * output datasets, includes year variable;
        data OUTDATA.attrib_otm_job_&met.;
            set attrib_otm_job_&met.;
        run;
        data OUTDATA.attrib_otm_lab_&met.;
            set attrib_otm_lab_&met.;
        run;
        data OUTDATA.compete_&met.;
            set compete_&met.;
        run;
        data OUTDATA.access_&met.;
            set access_&met.;
        run;

        %let metord=%eval(&metord+1);
    %end;                     

%mend lostjob_run0access;
                
/* run step 0 modechoice --------------------------------------------------------------- */
%macro lostjob_run0modechoice;

    * pre-process mode choice model estimation;
    %let metord = &metord_start.;
    * cycle through each city, choosing the appropriate state, state codes, and counties;
    %do %until("%scan(&metrolist.,&metord.)"="");
        %let met=%scan(&metrolist.,&metord.);
        %let st=%scan(&stlist.,&metord.);
        %let stmet=&st.&met.;
        %put Prep for mode choice analysis for state &st., metro &met.;
                
        * aggregate proximity file to see if there are is no MPO transit data;
        data proxmeas;
            set OUTDATA.proxmeas_&met. (rename=(o_stfid=h_stfid));
            if predict_tran>0 then predict_tran=1;
        run;
        proc summary data=proxmeas;
            class h_stfid;
            var predict_tran;
            output out=proxmeassum (drop=_TYPE_ _FREQ_) 
                max(predict_tran)=max_predict_tran
                sum(predict_tran)=sum_predict_tran
                n(predict_tran)=count_routes;
        run;
        data proxmeassum;
            length stmet $5;
            merge proxmeassum (in=in_prox)
                  OUTDATA.census2000_&met. (in=in_cen keep=h_stfid com_pub_rate);
            by h_stfid;
            if (in_prox and in_cen);
            * designate metro area;
            stmet="&stmet.";
            * If share predicted or imputed is excessive, MPO data indicates minimal transit;
            share_predict_tran=sum_predict_tran/count_routes;
            if (share_predict_tran<0.9) then tract_transit_mpo=1;
            else tract_transit_mpo=0;
            * If very few residents use transit (usually as few as when no MPO transit data exists);
            if (com_pub_rate>0.05) then tract_transit_census=1;
            else tract_transit_census=0;
            label share_predict_tran="Share of possible routes where transit time is imputed"
                  tract_transit_mpo="Tract has valid MPO transit time for at least 10 percent or routes"
                  tract_transit_census="Tract at least 5 percent of Census 2000 commuters using transit"
            ;
        run;
        proc freq data=proxmeassum;
            title "Incidence of a tract being reasonable for transit use for metro &met.";
            table tract_transit_mpo*tract_transit_census /missing;
        run;
        data _null;
        if cexist("OUTDATA.proxgraphs.predtrancom"||compress(gindex)||".grseg") then do;
            call execute("proc catalog catalog=OUTDATA.proxgraphs;");
            call execute("delete predtrancom"||compress(gindex)||".grseg;");
            call execute("quit;");
        end;
        run;
        filename outgraph "/programs/projects/kutzb001/access/vintages/control_07/loglst/graphtran_&met..pdf";
        goptions reset=all device=pdfc gsfname=outgraph gsfmode=replace ftext="Helvetica";
        proc gplot data=proxmeassum gout=OUTDATA.proxgraphs;
            title "transit time prediction and commute shares for metro &met.";
            plot share_predict_tran*com_pub_rate /name="predtrancom";
            label share_predict_tran='share transit commutes imputed'
                  com_pub_rate='share commuters using public transit';
        run;
        proc means data=proxmeassum (where=(share_predict_tran>0.90));
            title "transit over 90 percent for metro &met.";
            var com_pub_rate;
        run;
        proc means data=proxmeassum (where=(share_predict_tran<=0.90));
            title "transit under 90 percent for metro &met.";
            var com_pub_rate;
        run;
        proc means data=proxmeassum (where=(com_pub_rate>0.05 and share_predict_tran<=0.90));
            title "transit under 90 percent and commuting over 5 percent for metro &met.";
            var share_predict_tran com_pub_rate;
        run;
        proc corr data=proxmeassum (where=(com_pub_rate>0.05 and share_predict_tran<=0.90));
            title "correlation transit predict and commute transit for metro &met.";
            var share_predict_tran com_pub_rate;
        run;

        * append metros (base is working file to avoid appending to existing data);
        proc append base=proxmeassum_all data=proxmeassum;
        run;

        %let metord=%eval(&metord+1);
    %end;

    * repeat quality checking for all cities together;
    proc sort data=proxmeassum_all out=OUTDATA.proxmeassum_all;
        by h_stfid;
    run;
    proc freq data=OUTDATA.proxmeassum_all;
        title "Incidence of a tract being reasonable for transit use for metro &met.";
        table tract_transit_mpo*tract_transit_census /missing;
    run;
    data _null;
    if cexist("OUTDATA.proxgraphs.predtrancom"||compress(gindex)||".grseg") then do;
        call execute("proc catalog catalog=OUTDATA.proxgraphs;");
        call execute("delete predtrancom"||compress(gindex)||".grseg;");
        call execute("quit;");
    end;
    run;
    filename outgraph "&dirprogvint./loglst/graphtran_all.pdf";
    goptions reset=all device=pdfc gsfname=outgraph gsfmode=replace ftext="Helvetica";
    proc gplot data=OUTDATA.proxmeassum_all gout=OUTDATA.proxgraphs;
        title "transit time prediction and commute shares for all";
        plot share_predict_tran*com_pub_rate /name="predtrancom";
        label share_predict_tran='share transit commutes imputed'
              com_pub_rate='share commuters using public transit';
    run;
    proc means data=OUTDATA.proxmeassum_all (where=(share_predict_tran>0.90));
        title "transit over 90 percent for all";
        var com_pub_rate;
    run;
    proc means data=OUTDATA.proxmeassum_all (where=(share_predict_tran<=0.90));
        title "transit under 90 percent for all";
        var com_pub_rate;
    run;
    proc means data=OUTDATA.proxmeassum_all (where=(com_pub_rate>0.05 and share_predict_tran<=0.90));
        title "transit under 90 percent and commuting over 5 percent for all";
        var share_predict_tran com_pub_rate;
    run;
    proc corr data=OUTDATA.proxmeassum_all (where=(com_pub_rate>0.05 and share_predict_tran<=0.90));
        title "correlation transit predict and commute transit for all";
        var share_predict_tran com_pub_rate;
    run;
    
    * initialize code macros that will be used below;
    %let metord = &metord_start.;
    * cycle through each city, choosing the appropriate state, state codes, and counties;
    %do %until("%scan(&metrolistmode.,&metord.)"="");
        %let met=%scan(&metrolistmode.,&metord.);
        %let st=%scan(&stlist.,&metord.);
        %let stmet=&st.&met.;
        %let stcode=%scan(&stcodelist.,&metord.);
        %let cty=%scan(%quote(&countylist.),&metord.,V);
        %put  &met. &st. &cty.;
                
        * modechoice.sas;
        %modechoice_setup;

        %let metord=%eval(&metord+1);
    %end;

    * modechoice.sas;
    %modechoice_est;
     
%mend lostjob_run0modechoice;


/* run step 1 ------------------------------------------------------------------------ */
%macro lostjob_run1;

    * load phf data;
    libname SNAP "&dirsnap./ehf/&st." access=readonly;

    * identify employment spells fitting separation and hiring patterns;
    data lostjob_phf_sep_sample (keep = pik sein sep_qtr earn_m:)
         lostjob_phf_hir_sample (keep = pik sein sep_qtr earn_p:)
        ;
        * define new variables (added 2 to earn_p to accomodate full quarter hires);
        length ehist $&emp_qtrs.
            emp_sep_pattern $%eval(&emp_qtr_span.+1+&sep_qtr_span.)
            emp_hir_pattern_0 $%eval(&emp_qtr_span.+1)
            emp_hir_pattern_1 $%eval(&emp_qtr_span.+2)
            emp_hir_pattern_2 $%eval(&emp_qtr_span.+3)
            emp_hir_pattern_3 $%eval(&emp_qtr_span.+4)
            emp_hir_pattern_4 $%eval(&emp_qtr_span.+5)
            earn_m0-earn_m%eval(&emphist_span.) 5.
            earn_p0-earn_p%eval(&sep_qtr_span.+2) 5.
            e_sein_&emp_qtr_start.-e_sein_&emp_qtr_end. 5.
            ;
        * retain variables needed for mn processing because multiple spells for units;
        retain e_sein_&emp_qtr_start.-e_sein_&emp_qtr_end.;
        * load earnings data and fill into arrays (could also use phf_b), add 2 for full qtr earnings;
        set SNAP.ehf_&st._phf (keep=pik sein e&emp_qtr_start.-e%eval(&emp_qtr_end.+2));
        array emp_array(&emp_qtr_start.:&emp_qtr_end.) e&emp_qtr_start.-e&emp_qtr_end.;
        array emp_sein_array(&emp_qtr_start.:&emp_qtr_end.) e_sein_&emp_qtr_start.-e_sein_&emp_qtr_end.;
        * fill in first spell earnings to sein total;
        by pik sein;
        if (first.sein) then do;
            do x=&emp_qtr_start. to &emp_qtr_end.;
                emp_sein_array(x)=0;
            end;
        end;
        * add subsequent spells to quarterly earnings of initial spell;
        do x=&emp_qtr_start. to &emp_qtr_end.;
            if (emp_array(x) ne .) then emp_sein_array(x)=emp_sein_array(x)+emp_array(x);
        end;
        * process all filled in job histories;
        if (last.sein) then do;
            * use sein complete earnings across all spells;
            do x=&emp_qtr_start. to &emp_qtr_end.;
                emp_array(x)=emp_sein_array(x);
            end;
            * record employment history as string 111110000 with separation in last 1;
            ehist =repeat('0',&emp_qtrs.);
            do x=&emp_qtr_start. to &emp_qtr_end.;
                if (emp_array(x) > 0) then substr(ehist,(x+1)-&emp_qtr_start.,1) = '1';
            end;
            * match employment history to desired pattern (first repeat is implied, so = num + 1);
            emp_sep_pattern=repeat('1',&emp_qtr_span.)||'00000000';
            emp_hir_pattern_0=repeat('0',&emp_qtr_span.-1)||'1';
            emp_hir_pattern_1=repeat('0',&emp_qtr_span.-1)||'01';
            emp_hir_pattern_2=repeat('0',&emp_qtr_span.-1)||'001';
            emp_hir_pattern_3=repeat('0',&emp_qtr_span.-1)||'0001';
            emp_hir_pattern_4=repeat('0',&emp_qtr_span.-1)||'00001';
            emp_hir_pattern_5=repeat('0',&emp_qtr_span.-1)||'000001';
            emp_hir_pattern_6=repeat('0',&emp_qtr_span.-1)||'0000001';
            emp_hir_pattern_7=repeat('0',&emp_qtr_span.-1)||'00000001';
            emp_hir_pattern_8=repeat('0',&emp_qtr_span.-1)||'000000001';
            * do for all quarters where possible to observe full history and outcome;
            do x=&emp_qtr_start.+&emphist_span. to &emp_qtr_end.-&sep_qtr_span.;
                %let ehist_start=(x+1)-&emp_qtr_start.-&emp_qtr_span.;
                * Separators;
                if (substr(ehist,&ehist_start.,length(emp_sep_pattern)) = emp_sep_pattern) then do;
                    * quarter of separation;
                    sep_qtr=x;
                    * earnings in quarter of separation minus y;
                    array earn_array(%eval(&emphist_span.+1)) earn_m0-earn_m%eval(&emphist_span.);
                    do y=1 to &emphist_span.+1;
                        earn_array(y)=emp_array((x+1)-y);
                    end;
                    output lostjob_phf_sep_sample;
                end;
                * Hirings;
                if  (
                    substr(ehist,&ehist_start.,length(emp_hir_pattern_0)) = emp_hir_pattern_0 or
                    substr(ehist,&ehist_start.,length(emp_hir_pattern_1)) = emp_hir_pattern_1 or
                    substr(ehist,&ehist_start.,length(emp_hir_pattern_2)) = emp_hir_pattern_2 or
                    substr(ehist,&ehist_start.,length(emp_hir_pattern_3)) = emp_hir_pattern_3 or
                    substr(ehist,&ehist_start.,length(emp_hir_pattern_4)) = emp_hir_pattern_4 or
                    substr(ehist,&ehist_start.,length(emp_hir_pattern_5)) = emp_hir_pattern_5 or
                    substr(ehist,&ehist_start.,length(emp_hir_pattern_6)) = emp_hir_pattern_6 or
                    substr(ehist,&ehist_start.,length(emp_hir_pattern_7)) = emp_hir_pattern_7 or
                    substr(ehist,&ehist_start.,length(emp_hir_pattern_8)) = emp_hir_pattern_8
                    ) then do;
                    * provisional quarter of separation;
                    * (implied, many of these will not match to an actual separation);
                    sep_qtr=x;
                    * earnings in quarter of separation plus y;
                    array emp_array_hir(&emp_qtr_start.:&emp_qtr_end.) e&emp_qtr_start.-e&emp_qtr_end.;
                    array earn_array_hir(%eval(&sep_qtr_span.+1)) earn_p0-earn_p&sep_qtr_span.;
                    do y=1 to &sep_qtr_span.+1;
                        earn_array_hir(y)=emp_array_hir((x-1)+y);
                    end;
                    * earnings used for full quarter hires;
                    earn_p%eval(&sep_qtr_span.+1)=e%eval(&emp_qtr_end.+1);
                    earn_p%eval(&sep_qtr_span.+2)=e%eval(&emp_qtr_end.+2);
                    output lostjob_phf_hir_sample;
                end;
            end;
        end;
    run;
    * sort by pik and separation quarter;
    proc sort data=lostjob_phf_sep_sample;
        by pik sep_qtr;
    run;
    proc sort data=lostjob_phf_hir_sample;
        by pik sep_qtr;
    run;
    * aggregate any cases of multiple separations in the same quarter;
    data lostjob_phf_sep
        (keep=pik sep_qtr sep_prevyear seindom seincount earndom earndom_m0 earntot tenuredom);
        length seindom $12 earnjob earntot earndom earndom_m0 5. seincount tenuredom sep_prevyear 3.;
        retain seindom
            seincount
            earndom
            earndom_m0
            earntot
            tenuredom
            ;
        set lostjob_phf_sep_sample;  
        by pik sep_qtr;
        array earn_array(%eval(&emphist_span.+1)) earn_m0-earn_m%eval(&emphist_span.);
        if first.sep_qtr then do;
            seindom=.;
            seincount=0;
            earndom=0;
            earndom_m0=0;
            earntot=0;
            tenuredom=0;
        end;
        * total number of jobs lost in that quarter;
        seincount + 1;
        * Annual earnings from lost job (sum of, cannot be done with arrays);
        earnjob=sum(of earn_m1-earn_m&emp_qtr_span.);
        * Annual earnings from all lost jobs;
        earntot + earnjob;
        * dominant lost job;
        if (earnjob gt earndom) then do;
            seindom = sein;
            earndom = earnjob;
            earndom_m0 = earn_m0;
            tenuredom=&emp_qtr_span.;
            * quarters at job (not including gaps);
            do y=&emp_qtr_span.+2 to &emphist_span.+1;
                if (earn_array(y) > 0) then do;
                    tenuredom+1;
                end;
            end;
        end;
        * identify the year before the separation, used to merge in data;
        * calculation for quarter base to use for determining previous year;
        sep_prevyear=1984+floor((sep_qtr-1)/4);
        if last.sep_qtr then output;
    run;
    * aggregate cases of multiple hires following a separation;
    data lostjob_phf_hir
        (keep=pik sep_qtr seindomhire: seindomfull: seincounthire: earndomhire: earntothire: earndomfull:);
        length seindomhire0-seindomhire&sep_qtr_span. 
               seindomhireb0-seindomhireb&sep_qtr_span. 
               seindomfull0-seindomfull&sep_qtr_span. $12
               seincounthire0-seincounthire&sep_qtr_span.
               earntothire0-earntothire&sep_qtr_span.
               earndomhire0-earndomhire&sep_qtr_span.
               earndomhireb0-earndomhireb&sep_qtr_span.
               earndomfull0-earndomfull&sep_qtr_span. 5.
               ;
        retain seincounthire0-seincounthire&sep_qtr_span.
            seindomhire0-seindomhire&sep_qtr_span.
            seindomhireb0-seindomhireb&sep_qtr_span.
            seindomfull0-seindomfull&sep_qtr_span.
            earntothire0-earntothire&sep_qtr_span.
            earndomhire0-earndomhire&sep_qtr_span.
            earndomhireb0-earndomhireb&sep_qtr_span.
            earndomfull0-earndomfull&sep_qtr_span.
            ;
        set lostjob_phf_hir_sample;     
        by pik sep_qtr;
        * define array of earnings (add 1 so that full quarter hires can be measured);
        array earn_array_hir(%eval(&sep_qtr_span.+3)) earn_p0-earn_p%eval(&sep_qtr_span.+2);
        * define array for hiring outcomes;
        array seincounthire_array(%eval(&sep_qtr_span.+1)) seincounthire0-seincounthire%eval(&sep_qtr_span.);
        array seindomhire_array(%eval(&sep_qtr_span.+1)) seindomhire0-seindomhire%eval(&sep_qtr_span.);
        array seindomhireb_array(%eval(&sep_qtr_span.+1)) seindomhireb0-seindomhireb%eval(&sep_qtr_span.);
        array seindomfull_array(%eval(&sep_qtr_span.+1)) seindomfull0-seindomfull%eval(&sep_qtr_span.);
        array earntothire_array(%eval(&sep_qtr_span.+1)) earntothire0-earntothire%eval(&sep_qtr_span.);
        array earndomhire_array(%eval(&sep_qtr_span.+1)) earndomhire0-earndomhire%eval(&sep_qtr_span.);
        array earndomhireb_array(%eval(&sep_qtr_span.+1)) earndomhireb0-earndomhireb%eval(&sep_qtr_span.);
        array earndomfull_array(%eval(&sep_qtr_span.+1)) earndomfull0-earndomfull%eval(&sep_qtr_span.);
        if first.sep_qtr then do;
            do y=1 to &sep_qtr_span.+1 ;
                seincounthire_array(y)=0;
                seindomhire_array(y)=.;
                seindomhireb_array(y)=.;
                seindomfull_array(y)=.;
                earntothire_array(y)=0;
                earndomhire_array(y)=0;
                earndomhireb_array(y)=0;
                earndomfull_array(y)=0;
            end;
        end;
        * fills in job info for all quarters subsequent to the implied separation;
        do y=1 to &sep_qtr_span.+1 ;
            if (earn_array_hir(y)>0) then do;
                * count of jobs in that quarter;
                seincounthire_array(y)+1;
                * total earnings in that quarter;
                earntothire_array(y)+earn_array_hir(y);
                * determine dominant job, based on in quarter earnings, of any job in that quarter;
                if (earn_array_hir(y)>earndomhire_array(y)) then do;
                    seindomhire_array(y)=sein;
                    earndomhire_array(y)=earn_array_hir(y);
                end;
                * determine dominant job, of jobs continuing to next quarter;
                if ((earn_array_hir(y+1)>0) and (earn_array_hir(y+1)>earndomhireb_array(y))) 
                then do;
                    seindomhireb_array(y)=sein;
                    * use earnings from subsequent quarter, after hired;
                    earndomhireb_array(y)=earn_array_hir(y+1);
                end;
                * determine dominant job, based on 2nd quarter of full quarter job;
                if ((earn_array_hir(y+1)>0) and (earn_array_hir(y+2)>0) 
                          and (earn_array_hir(y+1)>earndomfull_array(y))) 
                then do;
                    seindomfull_array(y)=sein;
                    * use earnings from full quarter, after hired;
                    earndomfull_array(y)=earn_array_hir(y+1);
                end;                
            end;
        end;
        if last.sep_qtr then output;
    run;
        
    * merge job data with cpr by year previous to job loss, restricts sample;        
    data lostjob_phf_sep (drop=geocode);
        length h_stfid $11 h_cty $5 cpr_geolen 3.;
        merge lostjob_phf_sep (in=in_sep) 
              OUTDATA.cpr_&met. (in=in_cpr
                keep=pik huid address_year geocode
                rename=(address_year=sep_prevyear))
              ;
        by pik sep_prevyear;
        * only retain observations that appear in job list and metro area;
        if (in_sep and in_cpr);
        * residence;
        h_stfid=geocode;
        h_cty=substr(geocode,1,5);
        cpr_geolen=length(h_stfid);
        sample_cprlen=0;
        if (cpr_geolen=11) then sample_cprlen=1;
        * only allow one separation per year;
        if (first.sep_prevyear);
        output lostjob_phf_sep;
    run;

    * merge in decennial census info;
    data lostjob_phf_sep;
        length agecen 3.;
        merge lostjob_phf_sep (in=in_sep)
              OUTDATA.census_short_sortpik
              (in=in_cen keep=pik qsex qage qspanx qracex puid)
              ;
        by pik;
        * retain those in jobs;
        if (in_sep);
        * format age;
        agecen = input(qage,3.);
        drop qage;
        * dummy for whether in census (may be multile, see step below);
        sample_cen=0;
        if (in_cen) then sample_cen=1;
    run;

    * merge hiring data into separations data by quarter of separation for each worker;
    data &intermed.lostjob_search_&met.;
        * order variables as desired;
        retain pik sep_qtr sep_prevyear seincount sein: earn:
               ;
        * merge all ehf datasets together;
        merge lostjob_phf_sep (in=in_sep)
              lostjob_phf_hir (in=in_hir)
              ;
        by pik sep_qtr;
        * only retain those who had a separation;
        if (in_sep);
        * may be multiple pik for worker in census, so keep first;
        if (first.sep_qtr);
        * used for summaries;
        one=1;
    run;
        
    * summary;
    proc freq data=&intermed.lostjob_search_&met.;
        title "summary of restrictions for &met.";
        table sample_cprlen*sample_cen;
    run;

    * compute stats of geo match rates;
    proc tabulate data=&intermed.lostjob_search_&met.;
        class cpr_geolen sep_prevyear sep_qtr;
        var one;
        table cpr_geolen='Length CPR address'*sep_prevyear='year previous of separation'*sep_qtr='quarter of separation', 
            n=' '*one='count of jobs'*F=10. / RTS=60.;
    run;

%mend lostjob_run1;
    
    
/* run step 2 --------------------------------------------------------------------------- */
%macro lostjob_run2;

    * create frame to add job histories to;
    data pik_hist (drop=sample_cprlen sample_cen);
        set &intermed.lostjob_search_&met.
        (keep=pik sep_qtr sep_prevyear puid huid h_stfid one
        sample_cprlen sample_cen
        where=((sample_cprlen=1) and (sample_cen=1))
        );
    run;
        
    ************* EARNINGS HISTORY OF SEPARATOR ************;
        
    * collapse separator list to pik records;
    proc summary data=pik_hist nway;
        class pik;
        var one;
        output out=pik_list (drop=_TYPE_ _FREQ_) sum(one)=jobslost;
    run;
        
    * match separator list to ehf (or a national file);
    libname SNAP "&dirsnap./ehf/&st." access=readonly;
    data pik_earn;
        merge pik_list (in=in_jobs keep=pik)
              SNAP.ehf_&st._phf (keep=pik sein e&emp_qtr_start.-e&emp_qtr_end.)
              ;
        by pik;
        if (in_jobs);
    run;

    * convert ehf into annual employer earnings (neccessary for MN);
    proc summary data=pik_earn nway;
        class pik sein;
        var e&emp_qtr_start.-e&emp_qtr_end.;
        output out=pik_earn (drop=_TYPE_ _FREQ_) sum=;
    run;

    * construct earnings variables specific to worker separation;
    data pik_earn;
        length j&emp_qtr_start.-j&emp_qtr_end. 3.;
        set pik_earn;
        * count jobs with earnings in period before each hypothetical separation;
        array emp_array(&emp_qtr_start.:&emp_qtr_end.) e&emp_qtr_start.-e&emp_qtr_end.;
        array job_array(&emp_qtr_start.:&emp_qtr_end.) j&emp_qtr_start.-j&emp_qtr_end.;
        do x=&emp_qtr_start.+&emphist_span. to &emp_qtr_end.-&sep_qtr_span.;
            job_array(x)=0;
            do y=1 to &emphist_span.;
                if (emp_array(x-y)>0) then job_array(x)=1;
            end;
        end;
    run;

    * convert ehf into annual total earnings;
    proc summary data=pik_earn nway;
        class pik;
        var e&emp_qtr_start.-e&emp_qtr_end. j&emp_qtr_start.-j&emp_qtr_end.;
        output out=pik_earn (drop=_TYPE_ _FREQ_) sum=;
    run;

    * produce pik by sep_qtr annual total earnings list;
    data pik_earn 
        (drop=e&emp_qtr_start.-e&emp_qtr_end. j&emp_qtr_start.-j&emp_qtr_end. q m earntotall_m9);
        length earntotall_m1-earntotall_m9 earntotall 
               earnvolall_m1-earnvolall_m8 earnvolall earngrwall 5.;
        set pik_earn;
        * earnings history data;
        array emp_array(&emp_qtr_start.:&emp_qtr_end.) e&emp_qtr_start.-e&emp_qtr_end.;
        * job count history data;
        array job_array(&emp_qtr_start.:&emp_qtr_end.) j&emp_qtr_start.-j&emp_qtr_end.;            
        * earnings history in last quarters before separation;
        array earntotall_array(9) earntotall_m1-earntotall_m9;
        earntotall_array(9)=0;
        * earnings volatility in last quarters before separation;
        array earnvolall_array(8) earnvolall_m1-earnvolall_m8;
        * go through each quarter, starting after two years;        
        do q=(&emp_qtr_start.+8) to &emp_qtr_end.;
            sep_qtr=q;
            * earnings history in last quarters;
            do m=1 to 8;
                * total earnings in each quarter;
                earntotall_array(9-m)=emp_array((q-9)+m);
                if (earntotall_array(9-m)=.) then earntotall_array(9-m)=0;
                * earnings volatility, normalized;
                if (earntotall_array(9-m)=0 and earntotall_array(9-m+1)=0) then earnvolall_array(9-m)=1;
                else earnvolall_array(9-m)=earntotall_array(9-m)/
                                           (0.5*(earntotall_array(9-m)+earntotall_array(9-m+1)));
            end;
            * Annual earnings from those lost jobs;
            earntotall=sum(of earntotall_m1-earntotall_m4);
            * Volatility of earnings from those lost jobs;
            earnvolall=var(of earnvolall_m1-earnvolall_m7);
            earngrwall=mean(of earnvolall_m1-earnvolall_m7);
            * Count of jobs prior to separation;
            jobcount_hist=job_array(q);
            * output total annual earnings for each quarter;
            output;
        end;
    run;
        
    * earnings history back to jobs data;
    data pik_hist;
        * merge all ehf datasets together;
        merge pik_hist (in=in_sep)
              pik_earn (keep=pik sep_qtr earntotall earnvolall earngrwall jobcount_hist)
              ;
        by pik sep_qtr;
        * only retain those who had a separation;
        if (in_sep);
    run;

    ************* CENSUS AND HOUSEHOLD ************;
                
    * summarize by household id;
    proc summary data=pik_hist nway;
        class puid;
        var one;
        output out=hu_census (drop=_TYPE_ _FREQ_) sum=fam_multi;
    run;
    
    * merge household id with census data to spouse, or pikco;
    data hu_census;
        length pikhh pikco $9;
        * retain related piks in same housing unit across records;
        retain pik pikhh pikco
               ;
        * use only records that are tied to a job;
        merge hu_census (in=in_jobs keep=puid)
              OUTDATA.census_short_sortpuid (in=in_cen keep=puid qrel pik);
        by puid;
        if (in_jobs and in_cen);
        * initialize housing unit relation variables;
        if first.puid then do;
            pikhh='';
            pikco='';
        end;
        * designate as a hh, head of household;
        if (qrel='01' and pikhh='') then do;
            pikhh=pik;
        end;
        * designate as a co-hh;
        if (pikco='') then do;
            if (qrel='02' or qrel='19') then do;
                pikco=pik;
            end;
        end;
        * output those that are neither a hh nor a co-hh (but lost job);
        if (not(qrel in ('01','02','19'))) then do;
            pikco='';
            output hu_census;
        end;
        * for all that are heads, wait for housing unit to complete;
        if last.puid then do;
            * output hh, may or may not have pikco retained and output;
            if (pikhh ne "") then do;
                pik=pikhh;
                output hu_census;
            end;
            * output co-hh, consider hh to be of equal status;
            if (pikco ne "") then do;
                pik=pikco;
                pikco=pikhh;
                output hu_census;
            end;
        end;
    run;
    * sort for merge with jobs data;
    * descending by pikco so that if multiple records of person,;
    *  only keep the one with a pikco;
    proc sort data=hu_census;
        by pik puid descending pikco;
    run;
    proc sort data=pik_hist;
        by pik puid;
    run;
    * merge pikco with worker file;
    data pik_hist;
        merge pik_hist (in=in_jobs)
              hu_census (keep=pik puid pikco)
              ;
        by pik puid;
        * only merge into observations that appear in job list;
        * this one sided merge drops some extra households that a pik is in;
        if (in_jobs);
        * only keep the first record of a person in a household, could be dups;
        if (first.puid);
        * if person had multiple piks in household and became own spouse,;
        *  drop the spouse id so that not count person twice in household earn;
        if (pik=pikco) then pikco='';
    run;
            
    * merge spouses residence to separators for verification;
    proc sort data=pik_hist;
        by pikco sep_prevyear;
    run;
    data pik_hist (drop=geocodeco huidco h_stfidco);
        length h_stfidco $11;
        merge pik_hist (in=in_jobs) 
              OUTDATA.cpr_&met. (keep=pik address_year huid geocode
              rename=(pik=pikco address_year=sep_prevyear huid=huidco geocode=geocodeco))
              ;
        by pikco sep_prevyear;
        * only merge into observations that appear in job list;
        if (in_jobs);
        * verify households, designate as married;
        h_stfidco=substr(geocodeco,1,11);
        married=0;
        if (pikco ne '') then do;
            if ((huid ne '') and (huid=huidco)) then married=1;
            if (sep_prevyear=2001 and h_stfid=h_stfidco) then married=1;
        end;
    run;
        
            
            
    ************* EARNINGS HISTORY OF SPOUSE ************;            
            
    * collapse separator list to pikco records;
    proc summary data=pik_hist (where=(pikco ne '')) nway;
        class pikco;
        var one;
        output out=pik_list (drop=_TYPE_ _FREQ_) sum(one)=jobslost;
    run;
        
    * match separator list to ehf (or a national file);
    libname SNAP "&dirsnap./ehf/&st." access=readonly;
    data pik_earn;
        merge pik_list (in=in_jobs keep=pikco)
              SNAP.ehf_&st._phf
              (keep=pik sein e&emp_qtr_start.-e&emp_qtr_end. rename=(pik=pikco))
              ;
        by pikco;
        if (in_jobs);
    run;
            
    * convert ehf into annual total earnings;
    proc summary data=pik_earn nway;
        class pikco;
        var e&emp_qtr_start.-e&emp_qtr_end.;
        output out=pik_earn (drop=_TYPE_ _FREQ_) sum=;
    run;
            
    * produce pik by sep_qtr annual total earnings list;
    data pik_earn
        (drop=e&emp_qtr_start.-e&emp_qtr_end. q m earntotall_m9);
        length earntotall_m1-earntotall_m9 earntotall 5.;
        set pik_earn;
        * earnings history data;
        array emp_array(&emp_qtr_start.:&emp_qtr_end.) e&emp_qtr_start.-e&emp_qtr_end.;
        * earnings history in last quarters before separation;
        array earntotall_array(9) earntotall_m1-earntotall_m9;
        earntotall_array(9)=0;
        * go through each quarter, starting after two years;        
        do q=(&emp_qtr_start.+8) to &emp_qtr_end.;
            sep_qtr=q;
            * earnings history in last quarters;
            do m=1 to 8;
                * total earnings in each quarter;
                earntotall_array(9-m)=emp_array((q-9)+m);
                if (earntotall_array(9-m)=.) then earntotall_array(9-m)=0;
            end;
            * Annual earnings from those lost jobs;
            earntotall=sum(of earntotall_m1-earntotall_m4);
            * output total annual earnings for each quarter;
            output;
        end;
    run;        

    * merge spouse earnings history back to jobs data;
    proc sort data=pik_hist;
        by pikco sep_qtr;
    run;
    data pik_hist;
        * merge all ehf datasets together;
        merge pik_hist (in=in_jobs)
              pik_earn (keep=pikco sep_qtr earntotall
                            rename=(earntotall=earntotallco))
              ;
        by pikco sep_qtr;
        * only retain those who had a separation;
        if (in_jobs);
        * set missing to zero;
        if (earntotallco=.) then earntotallco=0;
        * create total household earnings later (may only keep if married=1);
    run;




    ************* EARNINGS HISTORY OF HOUSEHOLD ************;

    * total household earnings and members is used for mode choice estimation;
    * this total can include spouse;
    * list of household members, exclude separated worker and take first nine;
    * wait on those with 2001 CPR, because no huid;
    proc sort data=pik_hist
            (keep=sep_prevyear huid pik sep_qtr
             where=((sep_prevyear ne 2001) and (huid ne '')))
             out=hulist;
        by sep_prevyear huid pik sep_qtr;
    run;
    proc sort data=OUTDATA.cpr_&met. 
              (keep=pik huid address_year
               rename=(address_year=sep_prevyear pik=pik_mem)
               where=((huid ne '') and (pik_mem ne '')))
              out=cprhuid;
        by sep_prevyear huid pik_mem;
    run;       
    * create cartesian product of housing units and people;
    proc sql nowarn;
        create table hupiklist as
            select * 
            from hulist, cprhuid
            where hulist.sep_prevyear=cprhuid.sep_prevyear
              and hulist.huid=cprhuid.huid
        ;
    quit;    
    * for those with a 2001 CPR, use decennial census to get household list;
    proc sort data=pik_hist 
            (keep=sep_prevyear puid pik sep_qtr h_stfid
             where=((sep_prevyear=2001) and (puid ne '')))
             out=hulist;
        by puid pik sep_qtr;
    run;
    data cenpuid;
        set OUTDATA.census_short_sortpuid
        (keep=pik puid rename=(pik=pik_mem) 
        where=((puid ne '') and (pik_mem ne '')));
    run;
    * create cartesian product of 2000 housing units and 2001 people ;
    proc sql nowarn;
        create table hupiklist2001 as
            select *
            from hulist as hulist, cenpuid as cenpuid
            where hulist.puid=cenpuid.puid
        ;
    quit;
    proc sort data=hupiklist2001;
        by pik_mem;
    run;
    data hupiklist2001 (drop=address_year geocode h_stfid_mem);
        merge hupiklist2001 (in=in_hu)
              OUTDATA.cpr_&met.
              (keep=pik address_year geocode rename=(pik=pik_mem)
               where=((address_year=2001) and (pik_mem ne '')))
              ;
        by pik_mem;
        if (in_hu);
        * verify that 2000 household member is still in 2001 hu with worker;
        h_stfid_mem=substr(geocode,1,11);
        if (h_stfid=h_stfid_mem);
    run;
    * consolidate two household lists;
    data hupiklist;
        set hupiklist
            (keep=pik sep_prevyear sep_qtr pik_mem)
            hupiklist2001
            (keep=pik sep_prevyear sep_qtr pik_mem)
            ;
        one=1;
    run;
    * count up size of household for summary;
    proc summary data=hupiklist nway;
        class pik sep_prevyear sep_qtr;
        var one;
        output out=hucount (drop=_type_ _freq_) sum(one)=hucount;
    run;
    proc freq data=hucount;
        title 'distribution of household sizes, by previous year';
        table hucount*sep_prevyear;
    run;
        
    * sort to by household/quarter before size limit of household;
    * do not include worker because earnings history already calculated;
    proc sort data=hupiklist (where=(pik ne pik_mem));
        by pik sep_qtr pik_mem;
    run;
    * retain only first 9 other individuals;
    data hupiklist;
        retain hucount;
        set hupiklist;
        by pik sep_qtr;
        if (first.sep_qtr) then hucount=0;
        hucount+1;
        if (hucount<10) then output;
    run;
    * merge in earnings of household members;
    proc summary data=hupiklist nway;
        class pik_mem;
        var hucount;
        output out=pik_earn (drop=_TYPE_ _FREQ_) n(hucount)=hucountnum;
    run;
    * match separator list to ehf (or a national file);
    libname SNAP "&dirsnap./ehf/&st." access=readonly;
    data pik_earn;
        merge pik_earn (in=in_jobs keep=pik_mem)
              SNAP.ehf_&st._phf
              (keep=pik sein e&emp_qtr_start.-e&emp_qtr_end. rename=(pik=pik_mem))
              ;
        by pik_mem;
        if (in_jobs);
    run;
            
    * convert ehf into annual total earnings;
    proc summary data=pik_earn nway;
        class pik_mem;
        var e&emp_qtr_start.-e&emp_qtr_end.;
        output out=pik_earn (drop=_TYPE_ _FREQ_) sum=;
    run;
                
    * produce pik by sep_qtr annual total earnings list;
    data pik_earn
        (drop=e&emp_qtr_start.-e&emp_qtr_end. q m earntotall_m9);
        length earntotall_m1-earntotall_m9 earntotall 5.;
        set pik_earn;
        * earnings history data;
        array emp_array(&emp_qtr_start.:&emp_qtr_end.) e&emp_qtr_start.-e&emp_qtr_end.;
        * earnings history in last quarters before separation;
        array earntotall_array(9) earntotall_m1-earntotall_m9;
        earntotall_array(9)=0;
        * go through each quarter, starting after two years;        
        do q=(&emp_qtr_start.+8) to &emp_qtr_end.;
            sep_qtr=q;
            * earnings history in last quarters;
            do m=1 to 8;
                * total earnings in each quarter;
                earntotall_array(9-m)=emp_array((q-9)+m);
                if (earntotall_array(9-m)=.) then earntotall_array(9-m)=0;
            end;
            * Annual earnings from those lost jobs;
            earntotall=sum(of earntotall_m1-earntotall_m4);
            * output total annual earnings for each quarter;
            output;
        end;
    run;                
    * merge to household list;
    proc sort data=hupiklist;
        by pik_mem sep_qtr;
    run;
    data hupiklist;
        merge hupiklist (in=in_jobs)
              pik_earn (keep=pik_mem sep_qtr earntotall)
              ;
        by pik_mem sep_qtr;
        * only retain those in households with a separation;
        if (in_jobs);
        * identify whether household member has been employed;
        earner=0;
        if (earntotall>0) then earner=1;
    run;            
    * summarize by household across pik_mem;
    proc summary data=hupiklist nway;
        class pik sep_qtr;
        var earntotall;
        output out=hupiklist (drop=_TYPE_ _FREQ_)
            sum(earntotall)=huearn sum(earner)=huearners n(earner)=pikcnt;
    run;
    * merge back to pik;
    proc sort data=pik_hist;
        by pik sep_qtr;
    run;
    data &intermed.lostjob_pik_hist_&met.;
        merge pik_hist (in=in_jobs)
              hupiklist
              ;
        by pik sep_qtr;
        if (in_jobs);
        * prepare variables for estimation (avoid divide by zero by add worker);
        huearn=max(0,huearn)+earntotall;
        huearners=max(0,huearners)+1;
        pikcnt=max(0,pikcnt)+1;
    run;

                
    ************* SUMMARY ************;
                
    * compute some more summary stats;
    proc freq data=&intermed.lostjob_pik_hist_&met.;
        title 'household variables';
        table married pikcnt huearners;
    run;
        
     
%mend lostjob_run2;

/* run step 3 --------------------------------------------------------------------------- */
%macro lostjob_run3;
               
    * create frame to add employer information to;
    data pik_sein (drop=sample_cprlen sample_cen);
        set &intermed.lostjob_search_&met.
        (keep=pik sep_qtr sep_prevyear seindom one sample_cprlen sample_cen h_stfid
         where=((sample_cprlen=1) and (sample_cen=1)));
    run;
    proc sort data=pik_sein;
        by pik seindom;
    run;
                
    * add seinunit from phf_b to worker record;
    * may drop multiple separations from same employer;
    libname SNAP "&dirsnap./qwicc/&st." access=readonly;
    * merge job separators to employer seinunit data;
    data lostjob_su (drop=e&sep_qtr_start.-e&sep_qtr_end. seinunit1);
        * retain only records with full home geography and census data;
        retain seinunit;
        length seinunit $5;
        merge pik_sein
              (in=in_jobs)
              SNAP.phf_&st._b
              (keep=pik sein seinunit1 e&sep_qtr_start.-e&sep_qtr_end. rename=(sein=seindom))
              ;
        * this should be a one to one merge;
        by pik seindom;
        * must be a separator;
        if (in_jobs);
        if (first.seindom) then seinunit='00000';
        * only keep spells with earnings in quarter of separation;
        array emp_array(&sep_qtr_start.:&sep_qtr_end.) e&sep_qtr_start.-e&sep_qtr_end.;
        if (emp_array(sep_qtr) ne .) then seinunit=seinunit1;
        * convert sep_qtr into year and quarter;
        year=floor((sep_qtr-1)/4)+1985;
        quarter=sep_qtr-(year-1985)*4;
        * only keep one of thse;
        if (last.seindom) then output lostjob_su;
    run;

    * merge seinunits to ecf and extract relevant fields for quarter of separation;
    proc sort data=lostjob_su;
        by seindom year quarter seinunit;
    run;
    libname SNAP "&dirsnap./ecf/&st." access=readonly;
    data lostjob_su;
        length leg_geocode_su $11 leg_geo_qual_su 3.;
        merge lostjob_su(in=in_su)
              SNAP.ecf_&st._seinunit
              (keep=sein seinunit leg_geocode leg_geo_qual year quarter
               rename=(sein=seindom))
              ;
        by seindom year quarter seinunit;
        * must be a separator;
        if (in_su);
        * initial assignment;
        leg_geocode_su=leg_geocode;
        leg_geo_qual_su=leg_geo_qual;
    run;
    * later on, merge on travel time or distance to previous job;
                
    * sort separation data by former employer, for merging;
    proc sort data=lostjob_su(drop=year quarter leg_geocode leg_geo_qual)
        out=lostjob_sein;
        by seindom sep_qtr;
    run;
   
    * get employer size data;
    libname SNAP "&dirsnap./ecf/&st." access=readonly;
    data ecf_qtrs (drop=ecf_data_qtr x seinsize_b);
        length seinsize_q%eval(&ecf_qtr_start.)-seinsize_q%eval(&ecf_qtr_end.) 5.;
        retain seinsize_q: ;
        set SNAP.ecf_&st._sein (where=(
                                (year-1985)*4+quarter>=&ecf_qtr_start.
                                and
                                (year-1985)*4+quarter<=&ecf_qtr_end.
                                )
                                keep = yr_qtr year quarter sein ui_seinsize_b 
                                        mode_es_naics_fnl2007_emp
                                        mode_leg_county_emp
                                rename=(sein=seindom ui_seinsize_b=seinsize_b
                                        mode_es_naics_fnl2007_emp=sein_naics6
                                        mode_leg_county_emp=sein_mode_cty
                                       )
                                );
        by seindom;
        * fill in available data for full range of eligible quarters;
        array seinsize_array(&ecf_qtrs.) seinsize_q:;        
        if first.seindom then do;
            do x=1 to &ecf_qtrs. ;
                seinsize_array(x)=0;
            end;
        end;
        ecf_data_qtr=(year-1985)*4+quarter;
        do x=1 to &ecf_qtrs. ;
            if (ecf_data_qtr=&ecf_qtr_start.-1+x) then do;
                seinsize_array(x)=seinsize_b;
            end;
        end;
        if last.seindom then output;
    run;

    * get successor predecessor data;
    libname SNAP "&dirsnap./spf/&st." access=readonly;
    data spf_qtrs (drop=spf_data_qtr x link_ui succ_link_ui);
        length pred_link_ui_q%eval(&ecf_qtr_start.)-pred_link_ui_q%eval(&ecf_qtr_end.)
               succ_link_ui_q%eval(&ecf_qtr_start.)-succ_link_ui_q%eval(&ecf_qtr_end.)
               3.;
        retain pred_link_ui_q: succ_link_ui_q:;
        set SNAP.spf_&st. (where=(
                                qtime>=&ecf_qtr_start.
                                and
                                qtime<=&ecf_qtr_end.
                                )
                                keep = qtime sein link_ui succ_link_ui
                                rename=(sein=seindom)
                                );
        by seindom;
        * fill in available data for full range of eligible quarters;
        array pred_array(&ecf_qtrs.) pred_link_ui_q:;        
        array succ_array(&ecf_qtrs.) succ_link_ui_q:;        
        if first.seindom then do;
            do x=1 to &ecf_qtrs. ;
                pred_array(x)=.;
                succ_array(x)=.;
            end;
        end;
        spf_data_qtr=qtime;
        do x=1 to &ecf_qtrs. ;
            if (spf_data_qtr=&ecf_qtr_start.-1+x) then do;
                * keep first if it is a 1, firm dies, and second, 4, if it survives (just in case);
                if (link_ui=1) then pred_array(x)=link_ui;
                if (link_ui=4 and pred_array(x)=.) then pred_array(x)=link_ui;
                * keep first if it is a 1, firm is born, and second, 4, if it continues (just in case);
                if (succ_link_ui=1) then succ_array(x)=succ_link_ui;
                if (succ_link_ui=4 and succ_array(x)=.) then succ_array(x)=succ_link_ui;
            end;
        end;        
        if last.seindom then output;
    run;

    * merge employer size data with separation data;
    data lostjob_sein (drop=seinsize_q: pred_link_ui_q: succ_link_ui_q: y);
        * define variables for before and after separation;
        length seinsize_m3 seinsize_m2 seinsize_m1 seinsize_m0 seinsize_p1 seinsize_p2 seinsize_p3 5.
               pred_link_m3 pred_link_m2 pred_link_m1 pred_link_m0 pred_link_p1 pred_link_p2 pred_link_p3
               succ_link_m3 succ_link_m2 succ_link_m1 succ_link_m0 succ_link_p1 succ_link_p2 succ_link_p3
               3.;
        * merge separators with ecf and spf data;
        merge lostjob_sein (in=in_jobs)
              ecf_qtrs (drop=yr_qtr year quarter)
              spf_qtrs (drop=qtime)
              ;
        if (in_jobs);
        by seindom;
        * full range of available quarters;
        array seinsize_array(&ecf_qtrs.) seinsize_q:;
        array pred_array(&ecf_qtrs.) pred_link_ui_q:;
        array succ_array(&ecf_qtrs.) succ_link_ui_q:;
        * quarters before and after separation;
        array sep_seinsize_array(7) seinsize_m3--seinsize_m0 seinsize_p1--seinsize_p3;
        array sep_pred_array(7) pred_link_m3--pred_link_m0 pred_link_p1--pred_link_p3;
        array sep_succ_array(7) succ_link_m3--succ_link_m0 succ_link_p1--succ_link_p3;
        * assign value to quarters relevant to each sein of pik with separation;
        do y=1 to 7 ;
            sep_seinsize_array(y) =seinsize_array(sep_qtr-&ecf_qtr_start.-3+y);
            sep_pred_array(y) =pred_array(sep_qtr-&ecf_qtr_start.-3+y);
            sep_succ_array(y) =succ_array(sep_qtr-&ecf_qtr_start.-3+y);
        end;
    run;

    * identify sample of displaced workers;
    data lostjob_sein (drop=q);
        length pred_link_m3_sp pred_link_m2_sp pred_link_m1_sp pred_link_m0_sp pred_link_p1_sp pred_link_p2_sp pred_link_p3_sp
               succ_link_m3_sp succ_link_m2_sp succ_link_m1_sp succ_link_m0_sp succ_link_p1_sp succ_link_p2_sp succ_link_p3_sp
               3.;
        set lostjob_sein;
        * arrays for successor predecessor quarters;
        array array_pred(7) pred_link_m3 pred_link_m2 pred_link_m1 pred_link_m0 pred_link_p1 pred_link_p2 pred_link_p3;
        array array_succ(7) succ_link_m3 succ_link_m2 succ_link_m1 succ_link_m0 succ_link_p1 succ_link_p2 succ_link_p3;
        array array_pred_sp(7) pred_link_m3_sp pred_link_m2_sp pred_link_m1_sp pred_link_m0_sp pred_link_p1_sp pred_link_p2_sp pred_link_p3_sp;
        array array_succ_sp(7) succ_link_m3_sp succ_link_m2_sp succ_link_m1_sp succ_link_m0_sp succ_link_p1_sp succ_link_p2_sp succ_link_p3_sp;
        * calculate proportional change in employment;
        seinsize_m3_yr = seinsize_p1/seinsize_m3;
        seinsize_m2_yr = seinsize_p2/seinsize_m2;
        seinsize_m1_yr = seinsize_p3/seinsize_m1;
        * successor predecessor events;
        do q=1 to 7;
            * identify those that die and 80 percent move to another firm;
            if (array_pred(q)=1 or array_pred(q)=4) then array_pred_sp(q)=0;
            else array_pred_sp(q)=1;
            * identify those that have 80 percent come from a predecessor (all the same ones as above);
            if (array_succ(q)=1 or array_succ(q)=4) then array_succ_sp(q)=0;
            else array_succ_sp(q)=1;
        end;
        * employment size >=25 and drop;
        if (seinsize_m3 ge 25 and seinsize_m3_yr <.7) then seinsize_m3_0025_drop30=1;
        else seinsize_m3_0025_drop30=0;
        if (seinsize_m2 ge 25 and seinsize_m2_yr <.7) then seinsize_m2_0025_drop30=1;
        else seinsize_m2_0025_drop30=0;
        if (seinsize_m1 ge 25 and seinsize_m1_yr <.7) then seinsize_m1_0025_drop30=1;
        else seinsize_m1_0025_drop30=0;
        * how many separated workers were in large enough firms?;
        seinsize_0025=0;
        if (seinsize_m3 ge 25 or seinsize_m2 ge 25 or seinsize_m1 ge 25) then seinsize_0025=1;
        * how many of the separated workers that were in large enough firms had displacements?;
        seinsize_0025_drop30=0;
        if (seinsize_m3_0025_drop30=1 or seinsize_m2_0025_drop30=1 or seinsize_m1_0025_drop30=1) then seinsize_0025_drop30=1;
    
        * displacement;
        seinsize_m3_0025_drop30_nosp =seinsize_m3_0025_drop30*pred_link_m3_sp*pred_link_m2_sp*pred_link_m1_sp*pred_link_m0_sp*succ_link_m3_sp*succ_link_m2_sp*succ_link_m1_sp*succ_link_m0_sp;
        seinsize_m2_0025_drop30_nosp =seinsize_m2_0025_drop30*pred_link_m2_sp*pred_link_m1_sp*pred_link_m0_sp*pred_link_p1_sp*succ_link_m2_sp*succ_link_m1_sp*succ_link_m0_sp*succ_link_p1_sp;
        seinsize_m1_0025_drop30_nosp =seinsize_m1_0025_drop30*pred_link_m1_sp*pred_link_m0_sp*pred_link_p1_sp*pred_link_p2_sp*succ_link_m1_sp*succ_link_m0_sp*succ_link_p1_sp*succ_link_p2_sp;
        * displacement any time;
        displaced_ever_nosp_25=0;
        if (seinsize_m3_0025_drop30_nosp=1 or seinsize_m2_0025_drop30_nosp=1 or seinsize_m1_0025_drop30_nosp=1) then displaced_ever_nosp_25=1;

        * alternate displacement;
        seinsize_m3_0025_drop30_nosp0 =seinsize_m3_0025_drop30*pred_link_m0_sp*succ_link_m0_sp;
        seinsize_m2_0025_drop30_nosp0 =seinsize_m2_0025_drop30*pred_link_m0_sp*succ_link_m0_sp;
        seinsize_m1_0025_drop30_nosp0 =seinsize_m1_0025_drop30*pred_link_m0_sp*succ_link_m0_sp;
        * displacement any time;
        displaced_ever_nosp0_25=0;
        if (seinsize_m3_0025_drop30_nosp0=1 or seinsize_m2_0025_drop30_nosp0=1 or seinsize_m1_0025_drop30_nosp0=1) then displaced_ever_nosp0_25=1;

        * test firm size >= 50
        * employment size and drop;
        if (seinsize_m3 ge 50 and seinsize_m3_yr <.7) then seinsize_m3_0050_drop30=1;
        else seinsize_m3_0050_drop30=0;
        if (seinsize_m2 ge 50 and seinsize_m2_yr <.7) then seinsize_m2_0050_drop30=1;
        else seinsize_m2_0050_drop30=0;
        if (seinsize_m1 ge 50 and seinsize_m1_yr <.7) then seinsize_m1_0050_drop30=1;
        else seinsize_m1_0050_drop30=0;
        * how many separated workers were in large enough firms?;
        seinsize_0050=0;
        if (seinsize_m3 ge 50 or seinsize_m2 ge 50 or seinsize_m1 ge 50) then seinsize_0050=1;
        * how many of the separated workers that were in large enough firms had displacements?;
        seinsize_0050_drop30=0;
        if (seinsize_m3_0050_drop30=1 or seinsize_m2_0050_drop30=1 or seinsize_m1_0050_drop30=1) then seinsize_0050_drop30=1;     
        * true displacement;
        seinsize_m3_0050_drop30_nosp =seinsize_m3_0050_drop30*pred_link_m3_sp*pred_link_m2_sp*pred_link_m1_sp*pred_link_m0_sp*succ_link_m3_sp*succ_link_m2_sp*succ_link_m1_sp*succ_link_m0_sp;
        seinsize_m2_0050_drop30_nosp =seinsize_m2_0050_drop30*pred_link_m2_sp*pred_link_m1_sp*pred_link_m0_sp*pred_link_p1_sp*succ_link_m2_sp*succ_link_m1_sp*succ_link_m0_sp*succ_link_p1_sp;
        seinsize_m1_0050_drop30_nosp =seinsize_m1_0050_drop30*pred_link_m1_sp*pred_link_m0_sp*pred_link_p1_sp*pred_link_p2_sp*succ_link_m1_sp*succ_link_m0_sp*succ_link_p1_sp*succ_link_p2_sp;
        * displacement any time;
        displaced_ever_nosp_50=0;
        if (seinsize_m3_0050_drop30_nosp=1 or seinsize_m2_0050_drop30_nosp=1 or seinsize_m1_0050_drop30_nosp=1) then displaced_ever_nosp_50=1;
        * true displacement;
        seinsize_m3_0050_drop30_nosp0 =seinsize_m3_0050_drop30*pred_link_m0_sp*succ_link_m0_sp;
        seinsize_m2_0050_drop30_nosp0 =seinsize_m2_0050_drop30*pred_link_m0_sp*succ_link_m0_sp;
        seinsize_m1_0050_drop30_nosp0 =seinsize_m1_0050_drop30*pred_link_m0_sp*succ_link_m0_sp;
        * displacement any time;
        displaced_ever_nosp0_50=0;
        if (seinsize_m3_0050_drop30_nosp0=1 or seinsize_m2_0050_drop30_nosp0=1 or seinsize_m1_0050_drop30_nosp0=1) then displaced_ever_nosp0_50=1;    
    run;

    * sort data for merging on travel time for separated job;
    proc sort data=lostjob_sein;
        by leg_geocode_su h_stfid;
    run;
    * merge with travel time data for previous job;
    data pik_time;
        merge lostjob_sein (in=in_jobs)
              OUTDATA.proxmeas_&met. (in=in_prox
                keep=o_stfid d_stfid dist auto tran
                rename=(o_stfid=h_stfid d_stfid=leg_geocode_su
                        dist=sep_dist auto=sep_auto tran=sep_tran))
              ;
        by leg_geocode_su h_stfid;
        if (in_jobs);
        * identify separators with available travel time to previous job;
        sep_prox=9;
        if (in_prox) then sep_prox=1;
    run;
                
    * for those with no prox time, use employer mode county;
    data prox_county;
        length d_cty $3;
        set OUTDATA.proxmeas_&met.;
        d_cty=substr(d_stfid,3,3);
    run;        
    proc summary data=prox_county;
        class d_cty o_stfid;
        var dist auto tran;
        output out=prox_county (drop=_FREQ_) mean=;
    run;
    * for jobs with a work county, apply average for tract to county (in state);
    proc sort data=pik_time;
        by sein_mode_cty h_stfid;
    run;
    data pik_time
        (drop=sep_dist_cty sep_auto_cty sep_tran_cty _TYPE_);
        merge pik_time (in=in_jobs)
              prox_county (in=in_prox
                keep=o_stfid d_cty dist auto tran _TYPE_
                where=(_TYPE_=3)
                rename=(o_stfid=h_stfid d_cty=sein_mode_cty
                        dist=sep_dist_cty auto=sep_auto_cty tran=sep_tran_cty))
              ;
        by sein_mode_cty h_stfid;
        if (in_jobs);
        * identify separators with available travel time to previous job;
        if ((sep_prox=9) and (in_prox)) then do;
            sep_prox=2;
            sep_dist=sep_dist_cty;
            sep_auto=sep_auto_cty;
            sep_tran=sep_tran_cty;
        end;
    run;
    * for jobs with no work county, apply average for tract to within metro area;
    proc sort data=pik_time;
        by h_stfid;
    run;
    data &intermed.lostjob_sein_&met.
        (drop=sep_dist_any sep_auto_any sep_tran_any _TYPE_
        pred: succ: seinsize_m: seinsize_p:);
        merge pik_time (in=in_jobs)
              prox_county (in=in_prox
                keep=o_stfid dist auto tran _TYPE_
                where=(_TYPE_=1)
                rename=(o_stfid=h_stfid
                        dist=sep_dist_any auto=sep_auto_any tran=sep_tran_any))
              ;
        by h_stfid;
        if (in_jobs);
        * identify separators with available travel time to previous job;
        if ((sep_prox=9) and (in_prox)) then do;
            sep_prox=3;
            sep_dist=sep_dist_any;
            sep_auto=sep_auto_any;
            sep_tran=sep_tran_any;
        end;
    run;                
    proc freq data=&intermed.lostjob_sein_&met.;
        title 'travel time to prev job 1=trt, 2=cty, 3=any';
        table sep_prox;
    run;
    proc freq data=&intermed.lostjob_sein_&met.;
        title 'progressive displacement sample restrictions';
        table seinsize_0025*seinsize_0025_drop30*displaced_ever_nosp_25;
    run;
    proc freq data=&intermed.lostjob_sein_&met.;
        title 'progressive displacement sample restrictions, nosp in disp qtr';
        table seinsize_0025*seinsize_0025_drop30*displaced_ever_nosp0_25;
    run;
    proc freq data=&intermed.lostjob_sein_&met.;
        title 'test: 50 progressive displacement sample restrictions';
        table seinsize_0050*seinsize_0050_drop30*displaced_ever_nosp_50;
    run;
    proc freq data=&intermed.lostjob_sein_&met.;
        title 'progressive displacement sample restrictions, nosp in disp qtr';
        table seinsize_0050*seinsize_0050_drop30*displaced_ever_nosp0_50;
    run;

    * sort again to pik by job level;
    * (drop=pred: succ: seinsize_m: seinsize_p:);
    proc sort data=&intermed.lostjob_sein_&met.;
        by pik sep_qtr;
    run;
        
%mend lostjob_run3;


/* run step 4 --------------------------------------------------------------------------- */
%macro lostjob_run4;
                
    %let metord = 1;
    * cycle through each city, choosing the appropriate state, state codes, and counties;
    %do %until("%scan(&metrolist.,&metord.)"="");
        %let met=%scan(&metrolist.,&metord.);
        %let st=%scan(&stlist.,&metord.);
        %let stmet=%scan(&stmetrolist.,&metord.);
        %let stcode=%scan(&stcodelist.,&metord.);
        %let cty=%scan(%quote(&countylist.),&metord.,V);
        %put  &met. &st. &cty.;

        * merge files of each metro area;
        data lostjob_met;
            merge &intermed.lostjob_search_&met. (in=in_sep)
                  &intermed.lostjob_pik_hist_&met. (in=in_hist)
                  &intermed.lostjob_sein_&met. (in=in_sein)
                  ;
            by pik sep_qtr;
            if (in_sep);
            * identify metro area;
            stmet_name="&stmet.";
            * focus on prime working age population;
            * calculate age in year of separation (not same as census);
            age=agecen+(sep_prevyear+1-2000);
            keep_age=0;
            if (age ge &agemin. and age le &agemax.) then keep_age=1;
            * focus on lower earnings workers;
            keep_earntotall=0;
            if (earntotall ge 15000 and earntotall le 40000) then keep_earntotall=1;
            * earnings lost must be large share of earnings;
            keep_earndom=0;
            if ((earndom/earntotall)>=0.5) then keep_earndom=1;
            * some PIKs are repeated across workers because of multiple ssn use;
            keep_count=0;
            if (seincount lt 3) then keep_count=1;
            * maximum number of jobs in recent history;
            keep_counthist=0;
            if (jobcount_hist le 40) then keep_counthist=1;
            * other flags;
            has_hist=0;
            if (in_hist) then has_hist=1;
            has_sein=0;
            if (in_sein) then has_sein=1;
        run;

        * add tract level data, access data added later;
        proc sort data=lostjob_met;
            by h_stfid;
        run;
        data lostjob_met;
            merge lostjob_met (in=in_sep)
                  OUTDATA.census2000_&met. (in=in_trt)
                  OTMEXT.xwalk_tract_puma (in=in_puma rename=(stfid=h_stfid))
                  ;
            by h_stfid;
            if (in_sep);
            * identify records with a match;
            sample_trt=0;
            if (in_trt) then do;
                sample_trt=1;
            end;
            sample_puma=0;
            if (in_puma) then do;
                sample_puma=1;
            end;
            * sample disqualification, alternate;
            sample_c=0;
            if ((sample_c=0) and (sample_cprlen ne 1)) then sample_c=1;
            if ((sample_c=0) and (sample_trt ne 1)) then sample_c=2;
            *if ((sample_c=0) and (sample_acc ne 1)) then sample_c=3;
            if ((sample_c=0) and (sample_puma ne 1)) then sample_c=4;
            if ((sample_c=0) and (sample_cen ne 1)) then sample_c=5;
            if ((sample_c=0) and (keep_age ne 1)) then sample_c=6;
            if ((sample_c=0) and (keep_earntotall ne 1)) then sample_c=7;
            if ((sample_c=0) and (keep_earndom ne 1)) then sample_c=8;
            if ((sample_c=0) and (has_sein ne 1)) then sample_c=9;
            if ((sample_c=0) and (seinsize_0025 ne 1)) then sample_c=10;
            if ((sample_c=0) and ((seinsize_0025_drop30 ne 1) and (substr(pik,1,1)>='2'))) then sample_c=11;
            if ((sample_c=0) and ((displaced_ever_nosp_25 ne 1) and (substr(pik,1,1)>='2'))) then sample_c=12;
            * traditional measure;
            sample_c_disp=0;
            if ((sample_c_disp=0) and (sample_cprlen ne 1)) then sample_c_disp=1;
            if ((sample_c_disp=0) and (sample_trt ne 1)) then sample_c_disp=2;
            *if ((sample_c_disp=0) and (sample_acc ne 1)) then sample_c_disp=3;
            if ((sample_c_disp=0) and (sample_puma ne 1)) then sample_c_disp=4;
            if ((sample_c_disp=0) and (sample_cen ne 1)) then sample_c_disp=5;
            if ((sample_c_disp=0) and (keep_age ne 1)) then sample_c_disp=6;
            if ((sample_c_disp=0) and (keep_earntotall ne 1)) then sample_c_disp=7;
            if ((sample_c_disp=0) and (keep_earndom ne 1)) then sample_c_disp=8;
            if ((sample_c_disp=0) and (has_sein ne 1)) then sample_c_disp=9;
            if ((sample_c_disp=0) and (seinsize_0025 ne 1)) then sample_c_disp=10;
            if ((sample_c_disp=0) and (seinsize_0025_drop30 ne 1)) then sample_c_disp=11;
            if ((sample_c_disp=0) and (displaced_ever_nosp_25 ne 1)) then sample_c_disp=12;
        run;

        * append to national file;
        proc append base=lostjob_national data=lostjob_met;
        run;

        %let metord=%eval(&metord+1);
    %end;
    
    proc sort data=lostjob_national;
        by pik sep_qtr h_stfid;
    run;
                    
    proc freq data=lostjob_national;
        title 'sample selection';
        table sample_c /missing;
    run;
    proc freq data=lostjob_national (where=(substr(pik,1,1)<'2'));
        title 'sample selection, all job separators';
        table sample_c /missing;
    run;
    proc freq data=lostjob_national;
        title 'Table D1: sample selection, only those elegible for displacement';
        table sample_c_disp /missing;
    run;
            
    proc format;
        value $industry
    	'11','21','22','23','31'-'33','42','48'-'49'=1
        '44'-'45','56','71','72','81'=2
    	'51','52','53','54','55'=3
        '61','92'=4
        '62'=5
        ;
    run;
            
    data OUTDATA.lostjob_national
        (drop=sample_c
              sample_cprlen sample_trt sample_puma
              sample_cen keep_age
              keep_earntotall keep_earndom keep_count keep_counthist
              /*seinsize_0025 seinsize_0025_drop30 displaced_ever_nosp_25*/);
        set lostjob_national (where=(sample_c=0));
        * year;
        sep_year=1985+floor((sep_qtr-1)/4);
        * season;
        sep_seas=sep_qtr-4*floor((sep_qtr-1)/4);
        * create variables, especially any needed for access prediction;
        female=0;
        if (qsex=2) then do;
            female=1;
        end;
        grp_hisp=0;
        if (qspanx>1) then do;
            grp_hisp=1;
        end;
        grp_white=0;
        if (qracex=1 and grp_hisp=0) then do;
            grp_white=1;
        end;
        grp_black=0;
        if (qracex=2 and grp_hisp=0) then do;
            grp_black=1;
        end;
        * groups;
        grp_other=0;
        grp_c=4;
        if (grp_white=1) then grp_c=1;
        if (grp_black=1) then grp_c=2;
        if (grp_hisp=1) then grp_c=3;
        if (grp_c=4) then grp_other=1;
        * Industry sector separated from, using NAICS;
        sep_sec_all=substr(sein_naics6,1,2);
        sep_sec_c=input(put(sep_sec_all,industry.),1.);
        * state city, for tabulation;
        stmet_name_c=0;
        if (stmet_name='inind') then stmet_name_c=1;
        if (stmet_name='ilchi') then stmet_name_c=2;
        if (stmet_name='midet') then stmet_name_c=3;
        if (stmet_name='ohcol') then stmet_name_c=4;
        if (stmet_name='ohcle') then stmet_name_c=5;
        if (stmet_name='mnmin') then stmet_name_c=6;
        if (stmet_name='nybuf') then stmet_name_c=7;
        if (stmet_name='papit') then stmet_name_c=8;
        if (stmet_name='wimil') then stmet_name_c=9;
        * age age_c;
        age_c=0;
        if (age>=20 and age<25) then do;
            age_c=1;
            age_20_24=1;
        end;
        else age_20_24=0;
        if (age>=25 and age<35) then do;
            age_c=2;
            age_25_34=1;
        end;
        else age_25_34=0;
        if (age>=35 and age<45) then do;
            age_c=3;
            age_35_44=1;
        end;
        else age_35_44=0;
        if (age>=45 and age<55) then do;
            age_c=4;
            age_45_54=1;
        end;
        else age_45_54=0;
        if (age>=55 and age<65) then do;
            age_c=5;
            age_55_64=1;
        end;
        else age_55_64=0;
        * normalized variables;
        earn_ln=log(earntotall);
        huearn_ln=log(huearn);
        huearnperwork=huearn/huearners;
        huearnperwork_ln=log(huearnperwork);
    run;

    proc freq data=OUTDATA.lostjob_national;
        title 'proximity impute assessment in restricted sample';
        table sep_prox;
    run;

    proc freq data=OUTDATA.lostjob_national;
        title 'metros and and years';
        table stmet_name*sep_prevyear;
    run;

    proc contents data=OUTDATA.lostjob_national;
    run;

%mend lostjob_run4;

/* run step 5 --------------------------------------------------------------------------- */
%macro lostjob_run5;
    
* predict auto ownership (use estimates from modechoise.sas);
    * use this portion for forcasting;
    data lostjob_predict
        (keep=pik sep_qtr sep_year stmet_name_c h_stfid sep_sec_c grp_c
        &modevars.);
        * rename some variables to match mode choice model params;
        set OUTDATA.lostjob_national;
        * auto to transit travel time ratio;
        if (sep_auto=0) then ratio_tran_auto=0;
        else ratio_tran_auto=(sep_tran-sep_auto)/(0.5*(sep_tran+sep_auto));
    run;
    
    * calculate predicted auto count;
    proc logistic inmodel=OUTDATA.model_auto;
        score data=lostjob_predict
        out=lostjob_predict;
    run;
    * expected autos, given that has an auto;
    data lostjob_predict;
        set lostjob_predict (rename=(ratio_tran_auto=sep_ratio_tran_auto));
        pauto=P_1+P_2+P_3;
        predict_wauto_cnt=1*(P_1/pauto)+
                          2*(P_2/pauto)+
                          3*(P_3/pauto);
        * calculate auto variables here;
        auto_earner=predict_wauto_cnt/(huearners);
        auto_pik=predict_wauto_cnt/(pikcnt);
    run;
    
    * assemble proximity data (limit to 60 minutes maximum travel time considered below);
    data proxmeas;
        set
        %let metord=1;
        %do %until("%scan(&metrolist.,&metord.)"="");
            %let met=%scan(&metrolist.,&metord.);
            %let stmet=%scan(&stmetrolist.,&metord.);
            OUTDATA.proxmeas_&met.
            (keep=d_stfid o_stfid auto tran dist
            where=((auto lt 60) or (tran lt 60) or (dist lt 20)))
            %let metord=%eval(&metord+1);
        %end;
        ;
    run;
    proc sort data=proxmeas;
        by d_stfid o_stfid;
    run;
    * create commute route variables;
    data proxmeas;
        set proxmeas (in=in_prox);
        * create variable for transit inefficiency on each OD route;
        * auto to transit travel time ratio;
        if (auto=0 or auto=.) then ratio_tran_auto=0;
        else ratio_tran_auto=(tran-auto)/(0.5*(tran+auto));
    run;
    * the output here will be big (need a national proxmeas file);
    proc sort data=lostjob_predict (drop=P_:);
        by h_stfid;
    run;
    proc sort data=proxmeas;
        by o_stfid;
    run;
    proc sql nowarn;
        create table lostjob_predict_od as
            select *
            from lostjob_predict, proxmeas
            where lostjob_predict.h_stfid=proxmeas.o_stfid
        ;
    quit;

    * predict transit use on each route given auto status;
    * (use estimates from modechoise.sas);
    * calculate predicted transit given no autos;
    proc logistic inmodel=OUTDATA.model_tran_0auto;
        score data=lostjob_predict_od (drop=o_stfid rename=(d_stfid=w_stfid))
        out=lostjob_predict_od;
    run;
    * calculate predicted transit given has autos;
    proc logistic inmodel=OUTDATA.model_tran_wauto;
        score data=lostjob_predict_od (drop=P_0 rename=(P_1=pred_0auto_tran))
        out=lostjob_predict_od;
    run;
        
    * assemble attribute data on jobs (select variables);
    data attrib_otm_job;
        set
        %let metord=1;
        %do %until("%scan(&metrolist.,&metord.)"="");
            %let met=%scan(&metrolist.,&metord.);
            %let stmet=%scan(&stmetrolist.,&metord.);
            OUTDATA.attrib_otm_job_&met.
            (keep=stfid year &use_otmvars2. rename=(stfid=w_stfid  year=sep_year))
            %let metord=%eval(&metord+1);
        %end;
        ;
    run;
    proc sort data=attrib_otm_job;
        by w_stfid sep_year;
    run;
    
    * assemble competing searcher information on jobs;
    data compete;
        set
        %let metord=1;
        %do %until("%scan(&metrolist.,&metord.)"="");
            %let met=%scan(&metrolist.,&metord.);
            %let stmet=%scan(&stmetrolist.,&metord.);
            OUTDATA.compete_&met.
            (keep=d_stfid year indxx_auto_o_lab: rings_dist_o_lab: rename=(d_stfid=w_stfid  year=sep_year))
            %let metord=%eval(&metord+1);
        %end;
        ;
    run;
    proc sort data=compete;
        by w_stfid sep_year;
    run;

    * add on competing population as well;
    data compete;
        merge compete (in=in_comp)
              OUTDATA.compete_cen2000 (keep=d_stfid indxx_auto_o_lab: rename=(d_stfid=w_stfid))
        ;
        by w_stfid;
    run;

    * merge attributes and competing searchers onto potential commutes;
    proc sort data=lostjob_predict_od;
        by w_stfid sep_year;
    run;
    data lostjob_predict_od;
        merge lostjob_predict_od (in=in_pred)
              attrib_otm_job (in=in_attrib)
              compete (in=in_compete)
              ;
        by w_stfid sep_year;
        * only retain prediction routes from actual origins;
        if (in_pred);
        * only retain routes that have jobs (if no jobs, then no index contribution);
        if (in_attrib);
    run;

    * contents;
    proc contents data=lostjob_predict_od;
    run;

    * calculate predictions for each pik;            
    * predict travel time for each route (could use retain and skip next step);
    data lostjob_predict_od;
        set lostjob_predict_od (drop=P_0 rename=(P_1=pred_wauto_tran));
        * probability of transit (see Appendix B);
        ptran=pauto*pred_wauto_tran+(1-pauto)*pred_0auto_tran;
        ptime=ptran*tran+(1-ptran)*auto;
        * set up for job access calcs, go through each job type;
        %let otmvar=1;
        %do %until("%scan(&use_otmvars2.,&otmvar.)"="");
            %let otmvarname=%scan(&use_otmvars2.,&otmvar.);
            * calculate index components right here, to be summed;
            * exponential discount - auto;
            if (auto lt 15) then index_auto_&otmvarname._15up_ex=&otmvarname.;
            else if (auto lt 60) then 
                index_auto_&otmvarname._15up_ex=&otmvarname./(exp(0.1*(max(1,auto-15))));
            else index_auto_&otmvarname._15up_ex=0;
            if (auto lt 10) then index_auto_&otmvarname._10up_ex=&otmvarname.;
            else if (auto lt 60) then 
                index_auto_&otmvarname._10up_ex=&otmvarname./(exp(0.1*(max(1,auto-10))));
            else index_auto_&otmvarname._10up_ex=0;
            if (auto lt 5) then index_auto_&otmvarname._5up_ex=&otmvarname.;
            else if (auto lt 60) then 
                index_auto_&otmvarname._5up_ex=&otmvarname./(exp(0.1*(max(1,auto-5))));
            else index_auto_&otmvarname._5up_ex=0;
            * exponential discount - transit component only;
            if (tran lt 5) then index_tran_&otmvarname._10up_ex=&otmvarname.;
            else if (tran lt 40) then 
                index_tran_&otmvarname._5up_ex=&otmvarname./(exp(0.1*(max(1,tran-5))));
            else index_tran_&otmvarname._5up_ex=0;
            if (tran lt 10) then index_tran_&otmvarname._10up_ex=&otmvarname.;
            else if (tran lt 40) then 
                index_tran_&otmvarname._10up_ex=&otmvarname./(exp(0.1*(max(1,tran-10))));
            else index_tran_&otmvarname._10up_ex=0;
            if (tran lt 15) then index_tran_&otmvarname._15up_ex=&otmvarname.;
            else if (tran lt 40) then 
                index_tran_&otmvarname._15up_ex=&otmvarname./(exp(0.1*(max(1,tran-15))));
            else index_tran_&otmvarname._15up_ex=0;
            * distance;
            if (dist lt 1) then index_dist_&otmvarname._1up_ex=&otmvarname.;
            else if (dist lt 60) then 
                index_dist_&otmvarname._1up_ex=&otmvarname./(exp(0.1*(max(1,dist-1))));
            else index_dist_&otmvarname._1up_ex=0;
            * probability weighted job opportunities;
            index_predtran_&otmvarname._5up_ex=ptran*index_tran_&otmvarname._5up_ex;
            index_predauto_&otmvarname._5up_ex=(1-ptran)*index_auto_&otmvarname._5up_ex;
            index_predboth_&otmvarname._5up_ex=index_predtran_&otmvarname._5up_ex+
                                                index_predauto_&otmvarname._5up_ex;
            index_predtran_&otmvarname._10up_ex=ptran*index_tran_&otmvarname._10up_ex;
            index_predauto_&otmvarname._10up_ex=(1-ptran)*index_auto_&otmvarname._10up_ex;
            index_predboth_&otmvarname._10up_ex=index_predtran_&otmvarname._10up_ex+
                                                index_predauto_&otmvarname._10up_ex;
            index_predtran_&otmvarname._15up_ex=ptran*index_tran_&otmvarname._15up_ex;
            index_predauto_&otmvarname._15up_ex=(1-ptran)*index_auto_&otmvarname._15up_ex;
            index_predboth_&otmvarname._15up_ex=index_predtran_&otmvarname._15up_ex+
                                                index_predauto_&otmvarname._15up_ex;
            * get rid of missing values (assume that CS is auto);
            indxx_auto_o_lab_5_&otmvarname.=max(0,indxx_auto_o_lab_5_&otmvarname.);
            indxx_auto_o_lab_10_&otmvarname.=max(0,indxx_auto_o_lab_10_&otmvarname.);
            indxx_auto_o_lab_15_&otmvarname.=max(0,indxx_auto_o_lab_15_&otmvarname.);
            * competing searchers, for tracts actually reached;
            indcs_auto_&otmvarname._5up_ex=
                index_auto_&otmvarname._5up_ex*indxx_auto_o_lab_5_&otmvarname.;
            indcs_tran_&otmvarname._5up_ex=
                index_tran_&otmvarname._5up_ex*indxx_auto_o_lab_5_&otmvarname.;
            indcs_predboth_&otmvarname._5up_ex=
                index_predboth_&otmvarname._5up_ex*indxx_auto_o_lab_5_&otmvarname.;
            indcs_predauto_&otmvarname._5up_ex=
                index_predauto_&otmvarname._5up_ex*indxx_auto_o_lab_5_&otmvarname.;
            indcs_predtran_&otmvarname._5up_ex=
                index_predtran_&otmvarname._5up_ex*indxx_auto_o_lab_5_&otmvarname.;
            indcs_auto_&otmvarname._10up_ex=
                index_auto_&otmvarname._10up_ex*indxx_auto_o_lab_10_&otmvarname.;
            indcs_tran_&otmvarname._10up_ex=
                index_tran_&otmvarname._10up_ex*indxx_auto_o_lab_10_&otmvarname.;
            indcs_predboth_&otmvarname._10up_ex=
                index_predboth_&otmvarname._10up_ex*indxx_auto_o_lab_10_&otmvarname.;
            indcs_predauto_&otmvarname._10up_ex=
                index_predauto_&otmvarname._10up_ex*indxx_auto_o_lab_10_&otmvarname.;
            indcs_predtran_&otmvarname._10up_ex=
                index_predtran_&otmvarname._10up_ex*indxx_auto_o_lab_10_&otmvarname.;
            indcs_auto_&otmvarname._15up_ex=
                index_auto_&otmvarname._15up_ex*indxx_auto_o_lab_15_&otmvarname.;
            indcs_tran_&otmvarname._15up_ex=
                index_tran_&otmvarname._15up_ex*indxx_auto_o_lab_15_&otmvarname.;
            indcs_predboth_&otmvarname._15up_ex=
                index_predboth_&otmvarname._15up_ex*indxx_auto_o_lab_15_&otmvarname.;
            indcs_predauto_&otmvarname._15up_ex=
                index_predauto_&otmvarname._15up_ex*indxx_auto_o_lab_15_&otmvarname.;
            indcs_predtran_&otmvarname._15up_ex=
                index_predtran_&otmvarname._15up_ex*indxx_auto_o_lab_15_&otmvarname.;
            %let otmvar=%eval(&otmvar+1);
        %end;
        * out of otm loop;
        * alternate competing searcher definitions;
        indcs_auto_pop16lab_10up_ex=
            index_auto_all_10up_ex*indxx_auto_o_lab_10_pop16lab;
        indcs_predboth_pop16lab_10up_ex=
            index_predboth_all_10up_ex*indxx_auto_o_lab_10_pop16lab;
        * create access vars specific to a worker (same or other, jobs or competing searchers);
        array _sec_job_auto(5) index_auto_sc1_10up_ex index_auto_sc2_10up_ex 
            index_auto_sc3_10up_ex index_auto_sc4_10up_ex index_auto_sc5_10up_ex;
        array _sec_job_tran(5) index_tran_sc1_10up_ex index_tran_sc2_10up_ex 
            index_tran_sc3_10up_ex index_tran_sc4_10up_ex index_tran_sc5_10up_ex;
        array _sec_lab_auto(5) indxx_auto_o_lab_10_sc1 indxx_auto_o_lab_10_sc2
            indxx_auto_o_lab_10_sc3 indxx_auto_o_lab_10_sc4 indxx_auto_o_lab_10_sc5;
        index_auto_scs_10up_ex=_sec_job_auto(sep_sec_c);
        index_auto_sco_10up_ex=index_auto_all_10up_ex-_sec_job_auto(sep_sec_c);
        indcs_auto_scsa_10up_ex=index_auto_scs_10up_ex*indxx_auto_o_lab_10_all;
        indcs_auto_scoa_10up_ex=index_auto_sco_10up_ex*indxx_auto_o_lab_10_all;
        indcs_auto_scss_10up_ex=index_auto_scs_10up_ex*_sec_lab_auto(sep_sec_c);
        indcs_auto_scso_10up_ex=index_auto_scs_10up_ex*(indxx_auto_o_lab_10_all-_sec_lab_auto(sep_sec_c));
        indcs_auto_scos_10up_ex=index_auto_sco_10up_ex*_sec_lab_auto(sep_sec_c);
        indcs_auto_scoo_10up_ex=index_auto_sco_10up_ex*(indxx_auto_o_lab_10_all-_sec_lab_auto(sep_sec_c));
        index_tran_scs_10up_ex=_sec_job_tran(sep_sec_c);
        index_tran_sco_10up_ex=index_tran_all_10up_ex-_sec_job_tran(sep_sec_c);
        indcs_tran_scsa_10up_ex=index_tran_scs_10up_ex*indxx_auto_o_lab_10_all;
        indcs_tran_scoa_10up_ex=index_tran_sco_10up_ex*indxx_auto_o_lab_10_all;
        indcs_tran_scss_10up_ex=index_tran_scs_10up_ex*_sec_lab_auto(sep_sec_c);
        indcs_tran_scso_10up_ex=index_tran_scs_10up_ex*(indxx_auto_o_lab_10_all-_sec_lab_auto(sep_sec_c));
        indcs_tran_scos_10up_ex=index_tran_sco_10up_ex*_sec_lab_auto(sep_sec_c);
        indcs_tran_scoo_10up_ex=index_tran_sco_10up_ex*(indxx_auto_o_lab_10_all-_sec_lab_auto(sep_sec_c));
        index_predboth_scs_10up_ex=ptran*index_tran_scs_10up_ex+(1-ptran)*index_auto_scs_10up_ex;
        index_predboth_sco_10up_ex=ptran*index_tran_sco_10up_ex+(1-ptran)*index_auto_sco_10up_ex;
        indcs_predboth_scsa_10up_ex=index_predboth_scs_10up_ex*indxx_auto_o_lab_10_all;
        indcs_predboth_scoa_10up_ex=index_predboth_sco_10up_ex*indxx_auto_o_lab_10_all;
        indcs_predboth_scss_10up_ex=index_predboth_scs_10up_ex*_sec_lab_auto(sep_sec_c);
        indcs_predboth_scso_10up_ex=index_predboth_scs_10up_ex*(indxx_auto_o_lab_10_all-_sec_lab_auto(sep_sec_c));
        indcs_predboth_scos_10up_ex=index_predboth_sco_10up_ex*_sec_lab_auto(sep_sec_c);
        indcs_predboth_scoo_10up_ex=index_predboth_sco_10up_ex*(indxx_auto_o_lab_10_all-_sec_lab_auto(sep_sec_c));
        * same and other for group;
        array _grp_job_auto(4) index_auto_gp1_10up_ex index_auto_gp2_10up_ex 
            index_auto_gp3_10up_ex index_auto_gp4_10up_ex;
        array _grp_job_tran(4) index_tran_gp1_10up_ex index_tran_gp2_10up_ex 
            index_tran_gp3_10up_ex index_tran_gp4_10up_ex;
        array _grp_lab_auto(4) indxx_auto_o_lab_10_gp1 indxx_auto_o_lab_10_gp2
            indxx_auto_o_lab_10_gp3 indxx_auto_o_lab_10_gp4;
        index_auto_gps_10up_ex=_grp_job_auto(grp_c);
        index_auto_gpo_10up_ex=index_auto_all_10up_ex-_grp_job_auto(grp_c);
        indcs_auto_gpsa_10up_ex=index_auto_gps_10up_ex*indxx_auto_o_lab_10_all;
        indcs_auto_gpoa_10up_ex=index_auto_gpo_10up_ex*indxx_auto_o_lab_10_all;
        indcs_auto_gpss_10up_ex=index_auto_gps_10up_ex*_grp_lab_auto(grp_c);
        indcs_auto_gpos_10up_ex=index_auto_gpo_10up_ex*_grp_lab_auto(grp_c);
        indcs_auto_gpas_10up_ex=index_auto_all_10up_ex*_grp_lab_auto(grp_c);
        indcs_auto_gpao_10up_ex=index_auto_all_10up_ex*(indxx_auto_o_lab_10_all-_grp_lab_auto(grp_c));
        index_tran_gps_10up_ex=_grp_job_tran(grp_c);
        index_tran_gpo_10up_ex=index_tran_all_10up_ex-_grp_job_tran(grp_c);
        indcs_tran_gpsa_10up_ex=index_tran_gps_10up_ex*indxx_auto_o_lab_10_all;
        indcs_tran_gpoa_10up_ex=index_tran_gpo_10up_ex*indxx_auto_o_lab_10_all;
        indcs_tran_gpss_10up_ex=index_tran_gps_10up_ex*_grp_lab_auto(grp_c);
        indcs_tran_gpos_10up_ex=index_tran_gpo_10up_ex*_grp_lab_auto(grp_c);
        indcs_tran_gpas_10up_ex=index_tran_all_10up_ex*_grp_lab_auto(grp_c);
        indcs_tran_gpao_10up_ex=index_tran_all_10up_ex*(indxx_auto_o_lab_10_all-_grp_lab_auto(grp_c));
        index_predboth_gps_10up_ex=ptran*index_tran_gps_10up_ex+(1-ptran)*index_auto_gps_10up_ex;
        index_predboth_gpo_10up_ex=ptran*index_tran_gpo_10up_ex+(1-ptran)*index_auto_gpo_10up_ex;
        indcs_predboth_gpsa_10up_ex=index_predboth_gps_10up_ex*indxx_auto_o_lab_10_all;
        indcs_predboth_gpoa_10up_ex=index_predboth_gpo_10up_ex*indxx_auto_o_lab_10_all;
        indcs_predboth_gpss_10up_ex=index_predboth_gps_10up_ex*_grp_lab_auto(grp_c);
        indcs_predboth_gpos_10up_ex=index_predboth_gpo_10up_ex*_grp_lab_auto(grp_c);
        indcs_predboth_gpas_10up_ex=index_predboth_all_10up_ex*_grp_lab_auto(grp_c);
        indcs_predboth_gpao_10up_ex=index_predboth_all_10up_ex*(indxx_auto_o_lab_10_all-_grp_lab_auto(grp_c));
        * distance and race checks (3 miles is similar in size to a ZIP code);
        %let otmvar=1;
            %do %until("%scan(all &use_otmgrp2.,&otmvar.)"="");        
            %let otmvarname=%scan(all &use_otmgrp2.,&otmvar.);
            if (dist lt 3) then index_dist_&otmvarname._3mi_ex=&otmvarname.;
            else index_dist_&otmvarname._3mi_ex=0;
            %let otmvar=%eval(&otmvar+1);
        %end;
        indcs_dist_all_3mi_ex=index_dist_all_3mi_ex*max(0,rings_dist_o_lab_3_all);
        array _grp_job_dist(4) index_dist_gp1_3mi_ex index_dist_gp2_3mi_ex index_dist_gp3_3mi_ex index_dist_gp4_3mi_ex;
        array _grp_lab_dist(4) rings_dist_o_lab_3_gp1 rings_dist_o_lab_3_gp2 rings_dist_o_lab_3_gp3 rings_dist_o_lab_3_gp4;
        index_dist_gps_3mi_ex=_grp_job_dist(grp_c);
        index_dist_gpo_3mi_ex=index_dist_all_3mi_ex-_grp_job_dist(grp_c);
        indcs_dist_gpsa_3mi_ex=index_dist_gps_3mi_ex*max(0,rings_dist_o_lab_3_all);
        indcs_dist_gpoa_3mi_ex=index_dist_gpo_3mi_ex*max(0,rings_dist_o_lab_3_all);
        indcs_dist_gpss_3mi_ex=index_dist_gps_3mi_ex*max(0,_grp_lab_dist(grp_c));
        indcs_dist_gpos_3mi_ex=index_dist_gpo_3mi_ex*max(0,_grp_lab_dist(grp_c));
        indcs_dist_gpas_3mi_ex=index_dist_all_3mi_ex*max(0,_grp_lab_dist(grp_c));
        indcs_dist_gpao_3mi_ex=index_dist_all_3mi_ex*max(0,rings_dist_o_lab_3_all-_grp_lab_dist(grp_c));
    run;
    * extract a (small) sample for review;
    data OUTDATA.lostjob_predict_od;
        set lostjob_predict_od (obs=10000
            keep=pik sep_qtr h_stfid w_stfid auto tran ptime pauto ptran
            index_predtran_all_10up_ex index_predauto_all_10up_ex
            index_predboth_all_10up_ex indcs_predboth_all_10up_ex 
            index_predboth_all_15up_ex indcs_predboth_all_15up_ex 
            index_auto_all_10up_ex index_tran_all_10up_ex
            indcs_predboth_pop16lab_10up_ex indcs_auto_pop16lab_10up_ex
            index_auto_scs_10up_ex
            index_auto_sco_10up_ex
            indcs_auto_scsa_10up_ex
            indcs_auto_scoa_10up_ex
            indcs_auto_scss_10up_ex
            indcs_auto_scso_10up_ex
            indcs_auto_scos_10up_ex
            indcs_auto_scoo_10up_ex
            index_tran_scs_10up_ex
            index_tran_sco_10up_ex
            indcs_tran_scsa_10up_ex
            indcs_tran_scoa_10up_ex
            indcs_tran_scss_10up_ex
            indcs_tran_scso_10up_ex
            indcs_tran_scos_10up_ex
            indcs_tran_scoo_10up_ex
            index_predboth_scs_10up_ex
            index_predboth_sco_10up_ex
            indcs_predboth_scsa_10up_ex
            indcs_predboth_scoa_10up_ex
            indcs_predboth_scss_10up_ex
            indcs_predboth_scso_10up_ex
            indcs_predboth_scos_10up_ex
            indcs_predboth_scoo_10up_ex
            /* group */
            index_auto_gps_10up_ex
            index_auto_gpo_10up_ex
            indcs_auto_gpsa_10up_ex
            indcs_auto_gpoa_10up_ex
            indcs_auto_gpss_10up_ex
            indcs_auto_gpos_10up_ex
            indcs_auto_gpas_10up_ex
            indcs_auto_gpao_10up_ex
            index_tran_gps_10up_ex
            index_tran_gpo_10up_ex
            indcs_tran_gpsa_10up_ex
            indcs_tran_gpoa_10up_ex
            indcs_tran_gpss_10up_ex
            indcs_tran_gpos_10up_ex
            indcs_tran_gpas_10up_ex
            indcs_tran_gpao_10up_ex
            index_predboth_gps_10up_ex
            index_predboth_gpo_10up_ex
            indcs_predboth_gpsa_10up_ex
            indcs_predboth_gpoa_10up_ex
            indcs_predboth_gpss_10up_ex
            indcs_predboth_gpos_10up_ex
            indcs_predboth_gpas_10up_ex
            indcs_predboth_gpao_10up_ex
            /* group dist */
            index_dist_all_3mi_ex
            index_dist_gps_3mi_ex
            index_dist_gpo_3mi_ex
            indcs_dist_all_3mi_ex
            indcs_dist_gpsa_3mi_ex
            indcs_dist_gpoa_3mi_ex
            indcs_dist_gpss_3mi_ex
            indcs_dist_gpos_3mi_ex
            indcs_dist_gpas_3mi_ex
            indcs_dist_gpao_3mi_ex
            );
    run;        
    * collapse accessibilty across all destinations by pik;
    proc summary data=lostjob_predict_od nway;
        class pik sep_qtr h_stfid;
        var ptran
            %let otmvar=1;
            %do %until("%scan(&use_otmvars2.,&otmvar.)"="");
            %let otmvarname=%scan(&use_otmvars2.,&otmvar.);
            index_dist_&otmvarname._1up_ex
            index_auto_&otmvarname._5up_ex
            index_tran_&otmvarname._5up_ex
            index_predtran_&otmvarname._5up_ex
            index_predauto_&otmvarname._5up_ex
            index_predboth_&otmvarname._5up_ex
            indcs_auto_&otmvarname._5up_ex
            indcs_tran_&otmvarname._5up_ex
            indcs_predboth_&otmvarname._5up_ex
            indcs_predauto_&otmvarname._5up_ex
            indcs_predtran_&otmvarname._5up_ex
            index_auto_&otmvarname._10up_ex
            index_tran_&otmvarname._10up_ex
            index_predtran_&otmvarname._10up_ex
            index_predauto_&otmvarname._10up_ex
            index_predboth_&otmvarname._10up_ex
            indcs_auto_&otmvarname._10up_ex
            indcs_tran_&otmvarname._10up_ex
            indcs_predboth_&otmvarname._10up_ex
            indcs_predauto_&otmvarname._10up_ex
            indcs_predtran_&otmvarname._10up_ex
            index_auto_&otmvarname._15up_ex
            index_tran_&otmvarname._15up_ex
            index_predtran_&otmvarname._15up_ex
            index_predauto_&otmvarname._15up_ex
            index_predboth_&otmvarname._15up_ex
            indcs_auto_&otmvarname._15up_ex
            indcs_tran_&otmvarname._15up_ex
            indcs_predboth_&otmvarname._15up_ex
            indcs_predauto_&otmvarname._15up_ex
            indcs_predtran_&otmvarname._15up_ex
            %let otmvar=%eval(&otmvar+1);
            %end;
            indcs_predboth_pop16lab_10up_ex indcs_auto_pop16lab_10up_ex
            index_auto_scs_10up_ex
            index_auto_sco_10up_ex
            indcs_auto_scsa_10up_ex
            indcs_auto_scoa_10up_ex
            indcs_auto_scss_10up_ex
            indcs_auto_scso_10up_ex
            index_tran_scs_10up_ex
            index_tran_sco_10up_ex
            indcs_tran_scsa_10up_ex
            indcs_tran_scoa_10up_ex
            indcs_tran_scss_10up_ex
            indcs_tran_scso_10up_ex
            indcs_tran_scos_10up_ex
            indcs_tran_scoo_10up_ex
            index_predboth_scs_10up_ex
            index_predboth_sco_10up_ex
            indcs_predboth_scsa_10up_ex
            indcs_predboth_scoa_10up_ex
            indcs_predboth_scss_10up_ex
            indcs_predboth_scso_10up_ex
            indcs_predboth_scos_10up_ex
            indcs_predboth_scoo_10up_ex
            /* group */
            index_auto_gps_10up_ex
            index_auto_gpo_10up_ex
            indcs_auto_gpsa_10up_ex
            indcs_auto_gpoa_10up_ex
            indcs_auto_gpss_10up_ex
            indcs_auto_gpos_10up_ex
            indcs_auto_gpas_10up_ex
            indcs_auto_gpao_10up_ex
            index_tran_gps_10up_ex
            index_tran_gpo_10up_ex
            indcs_tran_gpsa_10up_ex
            indcs_tran_gpoa_10up_ex
            indcs_tran_gpss_10up_ex
            indcs_tran_gpos_10up_ex
            indcs_tran_gpas_10up_ex
            indcs_tran_gpao_10up_ex
            index_predboth_gps_10up_ex
            index_predboth_gpo_10up_ex
            indcs_predboth_gpsa_10up_ex
            indcs_predboth_gpoa_10up_ex
            indcs_predboth_gpss_10up_ex
            indcs_predboth_gpos_10up_ex
            indcs_predboth_gpas_10up_ex
            indcs_predboth_gpao_10up_ex
            /* group dist */
            index_dist_all_3mi_ex
            index_dist_gps_3mi_ex
            index_dist_gpo_3mi_ex
            indcs_dist_all_3mi_ex
            indcs_dist_gpsa_3mi_ex
            indcs_dist_gpoa_3mi_ex
            indcs_dist_gpss_3mi_ex
            indcs_dist_gpos_3mi_ex
            indcs_dist_gpas_3mi_ex
            indcs_dist_gpao_3mi_ex
            ;
        id pauto predict_wauto_cnt ratio_tran_auto;
        output out=OUTDATA.lostjob_access (drop=_TYPE_ _FREQ_ ptran)
        sum= mean(ptran)=ptran_avg;
    run;
    * normalize competing searchers measures for average number of job opportunities;
    %macro normratio(pref,suba,subb,post);
        if (index_&pref._&suba._&post.=0) then do;
            indcs_&pref._&subb._&post.=0;
        end;
        else do;
            indcs_&pref._&subb._&post.=indcs_&pref._&subb._&post./index_&pref._&suba._&post.;
        end;
    %mend;
    data OUTDATA.lostjob_access;
        set OUTDATA.lostjob_access;
        %let otmvar=1;
        %do %until("%scan(&use_otmvars2.,&otmvar.)"="");
        %let otmvarname=%scan(&use_otmvars2.,&otmvar.);
        if (index_auto_&otmvarname._5up_ex=0) then indcs_auto_&otmvarname._5up_ex=0;
        else indcs_auto_&otmvarname._5up_ex=
            indcs_auto_&otmvarname._5up_ex/index_auto_&otmvarname._5up_ex;
        if (index_tran_&otmvarname._5up_ex=0) then indcs_tran_&otmvarname._5up_ex=0;
        else indcs_tran_&otmvarname._5up_ex=
            indcs_tran_&otmvarname._5up_ex/index_tran_&otmvarname._5up_ex;
        if (index_predboth_&otmvarname._5up_ex=0) then indcs_predboth_&otmvarname._5up_ex=0;
        else indcs_predboth_&otmvarname._5up_ex=
            indcs_predboth_&otmvarname._5up_ex/index_predboth_&otmvarname._5up_ex;
        if (index_predauto_&otmvarname._5up_ex=0) then indcs_predauto_&otmvarname._5up_ex=0;
        else indcs_predauto_&otmvarname._5up_ex=
            indcs_predauto_&otmvarname._5up_ex/index_predauto_&otmvarname._5up_ex;
        if (index_predtran_&otmvarname._5up_ex=0) then indcs_predtran_&otmvarname._5up_ex=0;
        else indcs_predtran_&otmvarname._5up_ex=
            indcs_predtran_&otmvarname._5up_ex/index_predtran_&otmvarname._5up_ex;

        if (index_auto_&otmvarname._10up_ex=0) then indcs_auto_&otmvarname._10up_ex=0;
        else indcs_auto_&otmvarname._10up_ex=
            indcs_auto_&otmvarname._10up_ex/index_auto_&otmvarname._10up_ex;
        if (index_tran_&otmvarname._10up_ex=0) then indcs_tran_&otmvarname._10up_ex=0;
        else indcs_tran_&otmvarname._10up_ex=
            indcs_tran_&otmvarname._10up_ex/index_tran_&otmvarname._10up_ex;
        if (index_predboth_&otmvarname._10up_ex=0) then indcs_predboth_&otmvarname._10up_ex=0;
        else indcs_predboth_&otmvarname._10up_ex=
            indcs_predboth_&otmvarname._10up_ex/index_predboth_&otmvarname._10up_ex;
        if (index_predauto_&otmvarname._10up_ex=0) then indcs_predauto_&otmvarname._10up_ex=0;
        else indcs_predauto_&otmvarname._10up_ex=
            indcs_predauto_&otmvarname._10up_ex/index_predauto_&otmvarname._10up_ex;
        if (index_predtran_&otmvarname._10up_ex=0) then indcs_predtran_&otmvarname._10up_ex=0;
        else indcs_predtran_&otmvarname._10up_ex=
            indcs_predtran_&otmvarname._10up_ex/index_predtran_&otmvarname._10up_ex;

        if (index_auto_&otmvarname._15up_ex=0) then indcs_auto_&otmvarname._15up_ex=0;
        else indcs_auto_&otmvarname._15up_ex=
            indcs_auto_&otmvarname._15up_ex/index_auto_&otmvarname._15up_ex;
        if (index_tran_&otmvarname._15up_ex=0) then indcs_tran_&otmvarname._15up_ex=0;
        else indcs_tran_&otmvarname._15up_ex=
            indcs_tran_&otmvarname._15up_ex/index_tran_&otmvarname._15up_ex;
        if (index_predboth_&otmvarname._15up_ex=0) then indcs_predboth_&otmvarname._15up_ex=0;
        else indcs_predboth_&otmvarname._15up_ex=
            indcs_predboth_&otmvarname._15up_ex/index_predboth_&otmvarname._15up_ex;
        if (index_predauto_&otmvarname._15up_ex=0) then indcs_predauto_&otmvarname._15up_ex=0;
        else indcs_predauto_&otmvarname._15up_ex=
            indcs_predauto_&otmvarname._15up_ex/index_predauto_&otmvarname._15up_ex;
        if (index_predtran_&otmvarname._15up_ex=0) then indcs_predtran_&otmvarname._15up_ex=0;
        else indcs_predtran_&otmvarname._15up_ex=
            indcs_predtran_&otmvarname._15up_ex/index_predtran_&otmvarname._15up_ex;
        %let otmvar=%eval(&otmvar+1);
        %end;
        %normratio(auto,scs,scsa,10up_ex);
        %normratio(auto,sco,scoa,10up_ex);
        %normratio(auto,scs,scss,10up_ex);
        %normratio(auto,scs,scso,10up_ex);
        %normratio(auto,sco,scos,10up_ex);
        %normratio(auto,sco,scoo,10up_ex);
        %normratio(tran,scs,scsa,10up_ex);
        %normratio(tran,sco,scoa,10up_ex);
        %normratio(tran,scs,scss,10up_ex);
        %normratio(tran,scs,scso,10up_ex);
        %normratio(tran,sco,scos,10up_ex);
        %normratio(tran,sco,scoo,10up_ex);
        %normratio(predboth,scs,scsa,10up_ex);
        %normratio(predboth,sco,scoa,10up_ex);
        %normratio(predboth,scs,scss,10up_ex);
        %normratio(predboth,scs,scso,10up_ex);
        %normratio(predboth,sco,scos,10up_ex);
        %normratio(predboth,sco,scoo,10up_ex);
        /* group */
        %normratio(auto,gps,gpsa,10up_ex);
        %normratio(auto,gpo,gpoa,10up_ex);
        %normratio(auto,gps,gpss,10up_ex);
        %normratio(auto,gpo,gpos,10up_ex);
        %normratio(auto,all,gpas,10up_ex);
        %normratio(auto,all,gpao,10up_ex);
        %normratio(tran,gps,gpsa,10up_ex);
        %normratio(tran,gpo,gpoa,10up_ex);
        %normratio(tran,gps,gpss,10up_ex);
        %normratio(tran,gpo,gpos,10up_ex);
        %normratio(tran,all,gpas,10up_ex);
        %normratio(tran,all,gpao,10up_ex);
        %normratio(predboth,gps,gpsa,10up_ex);
        %normratio(predboth,gpo,gpoa,10up_ex);
        %normratio(predboth,gps,gpss,10up_ex);
        %normratio(predboth,gpo,gpos,10up_ex);
        %normratio(predboth,all,gpas,10up_ex);
        %normratio(predboth,all,gpao,10up_ex);
        /* group dist */
        %normratio(dist,all,all,3mi_ex);
        %normratio(dist,gps,gpsa,3mi_ex);
        %normratio(dist,gpo,gpoa,3mi_ex);
        %normratio(dist,gps,gpss,3mi_ex);
        %normratio(dist,gpo,gpos,3mi_ex);
        %normratio(dist,all,gpas,3mi_ex);
        %normratio(dist,all,gpao,3mi_ex);
        /* other baseline populations */
        if (index_auto_all_10up_ex=0) then indcs_auto_pop16lab_10up_ex=0;
        else indcs_auto_pop16lab_10up_ex=
            indcs_auto_pop16lab_10up_ex/index_auto_all_10up_ex;
        if (index_predboth_all_10up_ex=0) then indcs_predboth_pop16lab_10up_ex=0;
        else indcs_predboth_pop16lab_10up_ex=
            indcs_predboth_pop16lab_10up_ex/index_predboth_all_10up_ex;
    run;
    * analysis;
    proc means data=OUTDATA.lostjob_access;
        title 'access summary';
        var index_auto_all_15up_ex
            index_auto_all_10up_ex
            index_tran_all_10up_ex
            index_auto_all_5up_ex
            index_dist_all_1up_ex
            index_predtran_all_10up_ex
            index_predauto_all_10up_ex
            index_predboth_all_5up_ex
            index_predboth_all_10up_ex
            index_predboth_all_15up_ex
            indcs_predboth_pop16lab_10up_ex indcs_auto_pop16lab_10up_ex
            indcs_auto_all_10up_ex
            indcs_tran_all_10up_ex
            indcs_predboth_all_5up_ex
            indcs_predboth_all_10up_ex
            indcs_predboth_all_15up_ex
            indcs_predauto_all_10up_ex
            indcs_predtran_all_10up_ex
            index_auto_scs_10up_ex
            index_auto_sco_10up_ex
            indcs_auto_scsa_10up_ex
            indcs_auto_scoa_10up_ex
            indcs_auto_scss_10up_ex
            indcs_auto_scso_10up_ex
            indcs_auto_scos_10up_ex
            indcs_auto_scoo_10up_ex
            index_tran_scs_10up_ex
            index_tran_sco_10up_ex
            indcs_tran_scsa_10up_ex
            indcs_tran_scoa_10up_ex
            indcs_tran_scss_10up_ex
            indcs_tran_scso_10up_ex
            indcs_tran_scos_10up_ex
            indcs_tran_scoo_10up_ex
            index_predboth_scs_10up_ex
            index_predboth_sco_10up_ex
            indcs_predboth_scsa_10up_ex
            indcs_predboth_scoa_10up_ex
            indcs_predboth_scss_10up_ex
            indcs_predboth_scso_10up_ex
            indcs_predboth_scos_10up_ex
            indcs_predboth_scoo_10up_ex
            /* group */
            index_auto_gps_10up_ex
            index_auto_gpo_10up_ex
            indcs_auto_gpsa_10up_ex
            indcs_auto_gpoa_10up_ex
            indcs_auto_gpss_10up_ex
            indcs_auto_gpos_10up_ex
            indcs_auto_gpas_10up_ex
            indcs_auto_gpao_10up_ex
            index_tran_gps_10up_ex
            index_tran_gpo_10up_ex
            indcs_tran_gpsa_10up_ex
            indcs_tran_gpoa_10up_ex
            indcs_tran_gpss_10up_ex
            indcs_tran_gpos_10up_ex
            indcs_tran_gpas_10up_ex
            indcs_tran_gpao_10up_ex
            index_predboth_gps_10up_ex
            index_predboth_gpo_10up_ex
            indcs_predboth_gpsa_10up_ex
            indcs_predboth_gpoa_10up_ex
            indcs_predboth_gpss_10up_ex
            indcs_predboth_gpos_10up_ex
            indcs_predboth_gpas_10up_ex
            indcs_predboth_gpao_10up_ex
            /* group dist */
            index_dist_all_3mi_ex
            index_dist_gps_3mi_ex
            index_dist_gpo_3mi_ex
            indcs_dist_all_3mi_ex
            indcs_dist_gpsa_3mi_ex
            indcs_dist_gpoa_3mi_ex
            indcs_dist_gpss_3mi_ex
            indcs_dist_gpos_3mi_ex
            indcs_dist_gpas_3mi_ex
            indcs_dist_gpao_3mi_ex
            /* other metrics */
            pauto ptran_avg ratio_tran_auto;
    run;
    * merge again with full data file in next step;

%mend lostjob_run5;
    
/* run step 6 --------------------------------------------------------------------------- */
%macro lostjob_run6;

    * initialize code macros that will be used below;
    %let metord = 1;
    * cycle through each city, choosing the appropriate state, state codes, and counties;
    %do %until("%scan(&metrolist.,&metord.)"="");
        %let met=%scan(&metrolist.,&metord.);
        %let st=%scan(&stlist.,&metord.);
        %let stmet=%scan(&stmetrolist.,&metord.);
        %let stcode=%scan(&stcodelist.,&metord.);
        %let cty=%scan(%quote(&countylist.),&metord.,V);
        %put  &met. &st. &cty.;

        proc append base=access_met
                    data=OUTDATA.access_&met. 
                        (keep=o_stfid year indxx_: rings_:);
        run;

        %let metord=%eval(&metord+1);
    %end;
    * add tract level data by year;
    proc sort data=access_met;
        by year o_stfid;
    run;
    data lostjob_national;
        set OUTDATA.lostjob_national;
        * auto to transit travel time ratio;
        if (sep_auto=0 or sep_auto=.) then sep_ratio_tran_auto=0;
        else sep_ratio_tran_auto=(sep_tran-sep_auto)/(0.5*(sep_tran+sep_auto));
    run;
    proc sort data=lostjob_national;
        by sep_year h_stfid;
    run;
    data lostjob_national;
        merge lostjob_national (in=in_job)
              access_met (in=in_access_otm rename=(o_stfid=h_stfid year=sep_year))
              ;
        by sep_year h_stfid;
        * only retain job records;
        if (in_job);
        * indicator for match to otm data;
        if (in_access_otm) then sample_acc=1;
        else sample_acc=0;
    run;
    * check if job access merge worked;
    proc freq data=lostjob_national;
        title 'evaluate merge of job access vars, by year and tract, should be no missing';
        table sep_year*sample_acc;
    run;
    * Particular geo merges:;
        * add identifier for whether tract is transit inaccessible;
        * add whether in central city census tract;
    proc sort data=lostjob_national;
        by h_stfid;
    run;
    data lostjob_national;
        merge lostjob_national (in=in_job)
              OUTDATA.proxmeassum_all
              (keep=h_stfid tract_transit_mpo tract_transit_census)
              GEO.central_tract (in=in_central keep=h_stfid central_tract stplcname2)
              ;
        by h_stfid;
        * only retain job records;
        if (in_job);
        * fix missing values;
        if (central_tract=.) then central_tract=0;
        if (tract_transit_mpo=.) then tract_transit_mpo=0;
        if (tract_transit_census=.) then tract_transit_census=0;
    run;
    proc freq data=lostjob_national;
        title "check central, and frequency of incidence of not having transit accessibility";
        table central_tract stplcname2 tract_transit_mpo*tract_transit_census /missing;
    run;
    * sort for quarter by quarter processing;
    proc sort data=lostjob_national;
        by pik sep_qtr h_stfid;
    run;

    * create analysis file;
    data lostjob_analysis (drop=e_mult_: e q earndomfull: earndomhire:
                    earntothire0 earntothire1 earntothire2 earntothire3 earntothire4
                    earntothire5 earntothire6 earntothire7 earntothire8);
        length e_mult_00 e_mult_50 e_mult_75 e_mult_90 3.
               surv_earn_00 surv_earn_50 surv_earn_75 surv_earn_90 3.
               hire_earn_00 hire_earn_50 hire_earn_75 hire_earn_90 3.
               qtr_hire_earn_00 qtr_hire_earn_50 qtr_hire_earn_75 qtr_hire_earn_90 3.
               surv_earndom_00 surv_earndom_50 surv_earndom_75 surv_earndom_90 3.
               hire_earndom_00 hire_earndom_50 hire_earndom_75 hire_earndom_90 3.
               qtr_hire_earndom_00 qtr_hire_earndom_50 qtr_hire_earndom_75 qtr_hire_earndom_90 3.
               surv_earnfull_00 surv_earnfull_50 surv_earnfull_75 surv_earnfull_90 3.
               hire_earnfull_00 hire_earnfull_50 hire_earnfull_75 hire_earnfull_90 3.
               qtr_hire_earnfull_00 qtr_hire_earnfull_50 qtr_hire_earnfull_75 qtr_hire_earnfull_90 3.
               seindomhirebest_00 seindomhirebest_50 seindomhirebest_75 seindomhirebest_90 $12
               earntothire 5.
            ;
        set lostjob_national;
        * e indicates 4 earnings ranges under consideration, percent previous earnings;
        array e_mult_array(4) e_mult_00--e_mult_90;
        array surv_earn_array(4) surv_earn_00--surv_earn_90;
        array hire_earn_array(4) hire_earn_00--hire_earn_90;
        array qtr_hire_earn_array(4) qtr_hire_earn_00--qtr_hire_earn_90;
        array surv_earndom_array(4) surv_earndom_00--surv_earndom_90;
        array hire_earndom_array(4) hire_earndom_00--hire_earndom_90;
        array qtr_hire_earndom_array(4) qtr_hire_earndom_00--qtr_hire_earndom_90;
        array surv_earnfull_array(4) surv_earnfull_00--surv_earnfull_90;
        array hire_earnfull_array(4) hire_earnfull_00--hire_earnfull_90;
        array qtr_hire_earnfull_array(4) qtr_hire_earnfull_00--qtr_hire_earnfull_90;
        array seindomhirebest_array(4) seindomhirebest_00--seindomhirebest_90;
        * set initial values;        
        do e=1 to 4;
            * input cutoff level multipliers;
            e_mult_array(e)=scan(&earn_mult.,e,' ');
            * are they still not hired to a certain level;
            surv_earn_array(e)=0;
            surv_earndom_array(e)=0;
            surv_earnfull_array(e)=0;
            * have they been hired, and do they earn over a share of their prior income;
            hire_earn_array(e)=0;
            hire_earndom_array(e)=0;
            hire_earnfull_array(e)=0;
            * what quarter first hired in;
            qtr_hire_earn_array(e)=%eval(&sep_qtr_span.+1);
            qtr_hire_earndom_array(e)=%eval(&sep_qtr_span.+1);
            qtr_hire_earnfull_array(e)=%eval(&sep_qtr_span.+1);
        end;
        * pull data from quarters to evaluate hiring;
        array earntothire_array(%eval(&sep_qtr_span.+1)) earntothire0-earntothire%eval(&sep_qtr_span.);
        array earndomhire_array(%eval(&sep_qtr_span.+1)) earndomhire0-earndomhire%eval(&sep_qtr_span.);
        array earndomhireb_array(%eval(&sep_qtr_span.+1)) earndomhireb0-earndomhireb%eval(&sep_qtr_span.);
        array earndomfull_array(%eval(&sep_qtr_span.+1)) earndomfull0-earndomfull%eval(&sep_qtr_span.);
        array seindomhire_array(%eval(&sep_qtr_span.+1)) seindomhire0-seindomhire%eval(&sep_qtr_span.);
        array seindomhireb_array(%eval(&sep_qtr_span.+1)) seindomhireb0-seindomhireb%eval(&sep_qtr_span.);
        array seindomfull_array(%eval(&sep_qtr_span.+1)) seindomfull0-seindomfull%eval(&sep_qtr_span.);
        * go through quarters;
        earntothire=0;
        do q=1 to %eval(&sep_qtr_span.+1);
            * total earnings after separation;
            earntothire=earntothire+max(0,earntothire_array(q));
            qtr=q-1;
            do e=1 to 4;
                * for total earnings;
                * determine if eligible for first acheiving earnings level in this quarter;
                * hire variable retained from previous observation, so check it;
                if (hire_earn_array(e)=1) then surv_earn_array(e)=0;
                * not previously acheived level, so eligible;
                else do;
                    surv_earn_array(e)=1;
                    * check if first acheiving total earnings level in this quarter;
                    if (earntothire_array(q)>(earndom/4)*e_mult_array(e)) then do;
                        * mark as hired to level from now on;
                        hire_earn_array(e)=1;
                        * mark quarter in which acheived level;
                        qtr_hire_earn_array(e)=qtr;
                    end;
                end;
                * for dominant earnings
                * determine if eligible for first obtaining job with earnings level in this quarter;
                * must have job in this quarter
                * could have earnings level in next quarter if still have job;
                * hire variable retained from previous observation, so check it;
                if (hire_earndom_array(e)=1) then surv_earndom_array(e)=0;
                * not previously hired to that level;
                else do;
                    surv_earndom_array(e)=1;
                    * check if first obtained job with sufficient earnings;
                    * either in this quarter,;
                    * or, contingent on having the job now, having the earnings next quarter;
                    if (earndomhire_array(q)>(earndom/4)*e_mult_array(e)) then do;
                        hire_earndom_array(e)=1;
                        qtr_hire_earndom_array(e)=qtr;
                        seindomhirebest_array(e)=seindomhire_array(q);
                    end;
                    else if (earndomhireb_array(q)>(earndom/4)*e_mult_array(e)) then do;
                        hire_earndom_array(e)=1;
                        qtr_hire_earndom_array(e)=qtr;
                        seindomhirebest_array(e)=seindomhireb_array(q);
                    end;
                end;
                * for full quarter job earnings;
                * determine if eligible for first acheiving earnings level in this quarter;
                * hire variable retained from previous observation, so check it;
                if (hire_earnfull_array(e)=1) then surv_earnfull_array(e)=0;
                * not previously acheived level, so eligible;
                else do;
                    surv_earnfull_array(e)=1;
                    * check if first acheiving total full quarter earnings level in this quarter;
                    if (earndomfull_array(q)>(earndom/4)*e_mult_array(e)) then do;
                        * mark as hired to level from now on;
                        hire_earnfull_array(e)=1;
                        * mark quarter in which acheived level;
                        qtr_hire_earnfull_array(e)=qtr;
                    end;
                end;
            end;
        * create record for each quarter;
        end;        
        * only the last quarter is accurate for qtr_hire_earn fields;
        if (qtr=%eval(&sep_qtr_span.));
        * earnings;
        earn_c=0;
        if (earntotall>=15000 and earntotall<20000) then earn_c=1;
        if (earntotall>=20000 and earntotall<25000) then earn_c=2;
        if (earntotall>=25000 and earntotall<30000) then earn_c=3;
        if (earntotall>=30000 and earntotall<35000) then earn_c=4;
        if (earntotall>=35000 and earntotall<=40000) then earn_c=5;
        earndomshare=earndom/earntotall;
        earndomshare_c=0;
        if (earndomshare>=0.5 and earndomshare<0.75) then earndomshare_c=1;
        if (earndomshare>=0.75 and earndomshare<1.0) then earndomshare_c=2;
        if (earndomshare=1) then earndomshare_c=3;
        * only add spouse earnings if married;
        if (married=0) then earntotallhu=earntotall;
        else earntotallhu=sum(0, earntotall, earntotallco);
        * take log of earnings;
        earndomln = log(earndom);
        earntotallln = log(earntotall);
        earntotallhuln = log(earntotallhu);
        earndomsepln= log(earndom_m0);
        earndom_sep_qtr=earndom_m0/(earndom/4);
        * geo;
        met_central=0;
        if (substr(h_stfid,1,5)='18097' or
            substr(h_stfid,1,5)='17031' or
            substr(h_stfid,1,5)='26163' or
            substr(h_stfid,1,5)='55079' or
            substr(h_stfid,1,5)='39049' or
            substr(h_stfid,1,5)='39035' or
            substr(h_stfid,1,5)='27053' or
            substr(h_stfid,1,5)='27123' or
            substr(h_stfid,1,5)='36029' or
            substr(h_stfid,1,5)='42003') then met_central=1;
        * fix some discrepencies;
        if (met_central=0 and central_tract=1) then central_tract=0;
        * travel time to previous job;
        sep_auto_c=0;
        if (sep_auto<=20) then sep_auto_c=1;
        if (sep_auto>20 and sep_auto<=40) then sep_auto_c=2;
        if (sep_auto>40) then sep_auto_c=3;
    run;

    * merge in job access data;
    data OUTDATA.lostjob_analysis;
        merge lostjob_analysis (in=in_jobs)
              OUTDATA.lostjob_access (in=in_access);
        by pik sep_qtr h_stfid;
        if ((in_jobs) and (in_access));
    run;

    * analysis of job search outcomes;
    proc freq data=OUTDATA.lostjob_analysis;
        title 'job search outcomes';
        table qtr_hire_earn_00 qtr_hire_earn_90 qtr_hire_earndom_90;
    run;
    * analysis of total variation;
    proc means data=OUTDATA.lostjob_analysis;
        title 'within metro access summary, total variation';
        class stmet_name;
        var index_predboth_all_10up_ex;
    run;
    * analysis of between tract variation;
    proc summary data=OUTDATA.lostjob_analysis;
        class h_stfid;
        var index_predboth_all_10up_ex;
        id stmet_name;
        output out=tract_sum (drop=_freq_) mean=;
    run;
    proc means data=tract_sum;
        title 'within metro access summary, between variation';
        class stmet_name;
        var index_predboth_all_10up_ex;
    run;
    * tract by tract summary;
    proc means data=OUTDATA.lostjob_analysis;
        title 'within tract (of person) access summary';
        class h_stfid;
        var index_predboth_all_10up_ex;
    run;

%mend lostjob_run6;

/* run step 7 --------------------------------------------------------------------------- */
%macro lostjob_run7;
    
    * export to csv, may require a sas session, rather than nohup;
    proc export data=OUTDATA.lostjob_analysis
        outfile="&dirdatavint./lostjob_analysis.csv"
        dbms=csv
        replace;
    run; 

    * test version;
    data lostjob_analysis_test;
        set OUTDATA.lostjob_analysis (obs=1000);
    run;
    * export to csv, may require a sas session, rather than nohup;
    proc export data=lostjob_analysis_test
        outfile="&dirdatavint./lostjob_analysis_test.csv"
        dbms=csv
        replace;
    run;        

%mend lostjob_run7;
    
/* run step 8 --------------------------------------------------------------------------- */
%macro lostjob_run8;
    * Disclosure protection;

    * macro for measuring cell sizes;
    %macro cellsizevar(uniqvar,tabvar);
        * identify unique observations within cell;
        proc sql;
            create table uniqcount as
            select &tabvar.,
                   &uniqvar.,
            count(one) as uniq_count
            from OUTDATA.lostjob_analysis (where=(qtr=8))
            group by &tabvar.,
                     &uniqvar.;
        quit;
        * count unique observations;
        proc means data=uniqcount nway;
            class &tabvar.
                  ;
            var uniq_count;
            output out=uniqcountsum (drop=_TYPE_ _FREQ_) n=;
        run;
        data uniqcountsum (keep=tabvar tabval uniq_count);
            length tabvar $25 tabval 3.;
            retain tabvar tabval;
            set uniqcountsum;
            tabvar="&tabvar.";
            tabval=&tabvar.;
        run;
        proc append base=DISC.&uniqvar._uniq_var data=uniqcountsum;
        run;
    %mend cellsizevar;
    
    * delete old disclosure files;
    proc datasets library=DISC;
       delete seindomhirebest_00_uniq_var;
    run;
    proc datasets library=DISC;
       delete seindomhirebest_75_uniq_var;
    run;
    proc datasets library=DISC;
       delete seindom_uniq_var;
    run;
    proc datasets library=DISC;
       delete pik_uniq_var;
    run;
        
    * transition tables;
    %cellsizevar(seindomhirebest_00,qtr_hire_earndom_00);
    %cellsizevar(seindomhirebest_75,qtr_hire_earndom_75);
    
    * summary stats tables and dummy vars, by employer separated from;
    %cellsizevar(seindom,qtr_hire_earndom_00);
    %cellsizevar(seindom,qtr_hire_earndom_75);
    %cellsizevar(seindom,female);
    %cellsizevar(seindom,grp_c);
    %cellsizevar(seindom,age_c);
    %cellsizevar(seindom,married);
    %cellsizevar(seindom,huearners);
    %cellsizevar(seindom,pikcnt);
    %cellsizevar(seindom,earn_c);
    %cellsizevar(seindom,earndomshare_c);
    %cellsizevar(seindom,sep_prox);
    %cellsizevar(seindom,sep_auto_c);
    %cellsizevar(seindom,tenuredom);
    %cellsizevar(seindom,sep_sec_c);
    %cellsizevar(seindom,sep_year);
    %cellsizevar(seindom,sep_seas);
    %cellsizevar(seindom,stmet_name_c);
    
    * summary stats tables and dummy vars, by job searcher;
    %cellsizevar(pik,qtr_hire_earndom_00);
    %cellsizevar(pik,qtr_hire_earndom_75);
    %cellsizevar(pik,female);
    %cellsizevar(pik,grp_c);
    %cellsizevar(pik,age_c);
    %cellsizevar(pik,married);
    %cellsizevar(pik,huearners);
    %cellsizevar(pik,pikcnt);
    %cellsizevar(pik,earn_c);
    %cellsizevar(pik,earndomshare_c);
    %cellsizevar(pik,sep_prox);
    %cellsizevar(pik,sep_auto_c);
    %cellsizevar(pik,tenuredom);
    %cellsizevar(pik,sep_sec_c);
    %cellsizevar(pik,sep_year);
    %cellsizevar(pik,sep_seas);
    %cellsizevar(pik,stmet_name_c);
        
* disclosure analysis;
%mend lostjob_run8;
    
























